From 6eee9e4be20acb23930bac7e29c5d589aaf164a7 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 30 Mar 2023 16:20:11 -0700 Subject: [PATCH 001/102] add option eager_reserve --- include/mimalloc.h | 6 ++++-- src/arena.c | 36 ++++++++++++++++++++++++++++++++++++ src/options.c | 11 +++++++++-- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 564be236..c7b6d61e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -339,6 +339,7 @@ typedef enum mi_option_e { mi_option_max_warnings, mi_option_max_segment_reclaim, mi_option_destroy_on_exit, + mi_option_eager_reserve, _mi_option_last } mi_option_t; @@ -349,8 +350,9 @@ mi_decl_export void mi_option_disable(mi_option_t option); mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); -mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); -mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option); mi_decl_export void mi_option_set(mi_option_t option, long value); mi_decl_export void mi_option_set_default(mi_option_t option, long value); diff --git a/src/arena.c b/src/arena.c index 152f7bea..f92cf683 100644 --- a/src/arena.c +++ b/src/arena.c @@ -191,6 +191,28 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n return p; } +// allocate in a speficic arena +static void* mi_arena_alloc_in(mi_arena_id_t arena_id, int numa_node, size_t size, size_t alignment, + bool* commit, bool* large, bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) +{ + MI_UNUSED_RELEASE(alignment); + mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + const size_t bcount = mi_block_count_of_size(size); + const size_t arena_index = mi_arena_id_index(arena_id); + mi_assert_internal(arena_index < max_arena); + mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE); + if (arena_index >= max_arena) return NULL; + + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + if (arena == NULL) return NULL; + if (arena->numa_node >= 0 && arena->numa_node != numa_node) return NULL; + if (!(*large) && arena->is_large) return NULL; + return mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); +} + + // allocate from an arena with fallback to the OS static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, @@ -263,6 +285,20 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; + + // otherwise, try to first eagerly reserve a new arena + size_t eager_reserve = mi_option_get_size(mi_option_eager_reserve); + eager_reserve = _mi_align_up(eager_reserve, MI_ARENA_BLOCK_SIZE); + if (eager_reserve > 0 && eager_reserve >= size && // eager reserve enabled and large enough? + req_arena_id == _mi_arena_id_none() && // not exclusive? + mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? + { + mi_arena_id_t arena_id = 0; + if (mi_reserve_os_memory_ex(eager_reserve, false /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { + p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + if (p != NULL) return p; + } + } } // finally, fall back to the OS diff --git a/src/options.c b/src/options.c index 6f6655f2..79b4e310 100644 --- a/src/options.c +++ b/src/options.c @@ -86,7 +86,8 @@ static mi_option_desc_t options[_mi_option_last] = { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. - { 0, UNINIT, MI_OPTION(destroy_on_exit)} // release all OS memory on process exit; careful with dangling pointer or after-exit frees! + { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! + { 0, UNINIT, MI_OPTION(eager_reserve) } // reserve memory N KiB at a time (slower in v1.x due to regions) }; static void mi_option_init(mi_option_desc_t* desc); @@ -124,6 +125,12 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma return (x < min ? min : (x > max ? max : x)); } +mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { + mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_eager_reserve); + long x = mi_option_get(option); + return (x < 0 ? 0 : (size_t)x * MI_KiB); +} + void mi_option_set(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); if (option < 0 || option >= _mi_option_last) return; @@ -517,7 +524,7 @@ static void mi_option_init(mi_option_desc_t* desc) { else { char* end = buf; long value = strtol(buf, &end, 10); - if (desc->option == mi_option_reserve_os_memory) { + if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_eager_reserve) { // this option is interpreted in KiB to prevent overflow of `long` if (*end == 'K') { end++; } else if (*end == 'M') { value *= MI_KiB; end++; } From 7cf60deb12ede18147e0d5ad2ed5a08a931fc3f0 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 31 Mar 2023 14:07:59 -0700 Subject: [PATCH 002/102] wip: initial delayed purging of arenas --- include/mimalloc.h | 3 +- include/mimalloc/atomic.h | 11 +++ src/arena.c | 190 ++++++++++++++++++++++++++++++++++++-- src/bitmap.h | 4 + src/options.c | 7 +- 5 files changed, 202 insertions(+), 13 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index c7b6d61e..b25ae2ff 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -339,7 +339,8 @@ typedef enum mi_option_e { mi_option_max_warnings, mi_option_max_segment_reclaim, mi_option_destroy_on_exit, - mi_option_eager_reserve, + mi_option_arena_reserve, + mi_option_arena_purge_delay, _mi_option_last } mi_option_t; diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index fe79fbca..572e18ed 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -284,6 +284,17 @@ static inline bool mi_atomic_once( mi_atomic_once_t* once ) { return mi_atomic_cas_strong_acq_rel(once, &expected, 1); // try to set to 1 } +typedef _Atomic(uintptr_t) mi_atomic_guard_t; + +// Allows only one thread to execute at a time +#define mi_atomic_guard(guard) \ + uintptr_t _mi_guard_expected = 0; \ + for(bool _mi_guard_once = true; \ + _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,1); \ + (mi_atomic_store_release(guard,0), _mi_guard_once = false) ) + + + // Yield #if defined(__cplusplus) #include diff --git a/src/arena.c b/src/arena.c index d2ff9dd9..679407c9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -47,8 +47,10 @@ typedef struct mi_arena_s { bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL bool is_large; // large- or huge OS pages (always committed) _Atomic(size_t) search_idx; // optimization to start the search for free blocks + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -152,12 +154,22 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n mi_bitmap_index_t bitmap_index; if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; - // claimed it! set the dirty bits (todo: no need for an atomic op here?) + // claimed it! void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); *memid = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index); - *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); *large = arena->is_large; *is_pinned = (arena->is_large || !arena->allow_decommit); + + // none of the claimed blocks should be scheduled for a decommit + if (arena->blocks_purge != NULL) { + // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `in_use`). + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index); + } + + // set the dirty bits (todo: no need for an atomic op here?) + *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + + // set commit state if (arena->blocks_committed == NULL) { // always committed *commit = true; @@ -275,7 +287,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena - size_t eager_reserve = mi_option_get_size(mi_option_eager_reserve); + size_t eager_reserve = mi_option_get_size(mi_option_arena_reserve); eager_reserve = _mi_align_up(eager_reserve, MI_ARENA_BLOCK_SIZE); if (eager_reserve > 0 && eager_reserve >= size && // eager reserve enabled and large enough? req_arena_id == _mi_arena_id_none() && // not exclusive? @@ -317,6 +329,158 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { return arena->start; } +/* ----------------------------------------------------------- + Arena purge +----------------------------------------------------------- */ + +// either resets or decommits memory, returns true if the memory was decommitted. +static bool mi_os_purge(void* p, size_t size, mi_stats_t* stats) { + if (mi_option_is_enabled(mi_option_reset_decommits) && // should decommit? + !_mi_preloading()) // don't decommit during preloading (unsafe) + { + _mi_os_decommit(p, size, stats); + return true; // decommitted + } + else { + _mi_os_reset(p, size, stats); + return false; // not decommitted + } +} + +// reset or decommit in an arena and update the committed/decommit bitmaps +static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(arena->blocks_committed != NULL); + mi_assert_internal(arena->blocks_purge != NULL); + mi_assert_internal(arena->allow_decommit); + const size_t size = blocks * MI_ARENA_BLOCK_SIZE; + void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE); + const bool decommitted = mi_os_purge(p, size, stats); + // update committed bitmap + if (decommitted) { + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); + } +} + +// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(arena->blocks_purge != NULL); + const long delay = mi_option_get(mi_option_arena_purge_delay); + if (_mi_preloading() || delay == 0) { + // decommit directly + mi_arena_purge(arena, bitmap_idx, blocks, stats); + } + else { + // schedule decommit + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire != 0) { + mi_atomic_add_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay + } + else { + mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); + } + _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); + } +} + +// return true if the full range was purged +static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { + const size_t endidx = startidx + bitlen; + size_t bitidx = startidx; + bool all_purged = false; + while (bitidx < endidx) { + size_t count = 0; + while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) == 1) { + count++; + } + if (count > 0) { + // found range to be purged + const mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(idx, bitidx); + mi_arena_purge(arena, bitmap_idx, count, stats); + if (count == bitlen) { + all_purged = true; + } + } + bitidx += (count+1); + } + return all_purged; +} + +// returns true if anything was decommitted +static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) +{ + if (!arena->allow_decommit || arena->blocks_purge == NULL) return false; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire == 0) return false; + if (!force && expire > now) return false; + + // reset expire (if not already set concurrently) + mi_atomic_cas_strong_acq_rel(&arena->purge_expire, &expire, 0); + + // potential purges scheduled, walk through the bitmap + bool any_purged = false; + bool full_purge = true; + for (size_t i = 0; i < arena->field_count; i++) { + size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]); + if (purge != 0) { + size_t bitidx = 0; + while (bitidx < MI_BITMAP_FIELD_BITS) { + size_t bitlen = 1; + if ((purge & ((size_t)1 << bitidx)) != 0) { + while ((bitidx + bitlen < MI_BITMAP_FIELD_BITS) && + ((purge & ((size_t)1 << (bitidx + bitlen))) != 0)) { bitlen++; } + // found range of purgeable blocks + // try to claim the longest range of corresponding in_use bits + const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); + while( bitlen > 0 ) { + if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { + break; + } + bitlen--; + } + // claimed count bits at in_use + if (bitlen > 0) { + // read purge again now that we have the in_use bits + purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); + if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { + full_purge = false; + } + any_purged = true; + } + else { + bitlen = 1; // make progress + } + } + bitidx += bitlen; + } + } + } + return any_purged; +} + +static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { + const long delay = mi_option_get(mi_option_arena_purge_delay); + if (_mi_preloading() || delay == 0) return; // nothing will be scheduled + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + if (max_arena == 0) return; + + // allow only one thread to purge at a time + static mi_atomic_guard_t purge_guard; + mi_atomic_guard(&purge_guard) + { + mi_msecs_t now = _mi_clock_now(); + size_t max_purge_count = (visit_all ? max_arena : 1); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (mi_arena_try_purge(arena, now, force, stats)) { + if (max_purge_count <= 1) break; + max_purge_count--; + } + } + } +} + + /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ @@ -339,6 +503,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); const size_t blocks = mi_block_count_of_size(size); + // checks if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -349,16 +514,17 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } + // potentially decommit if (!arena->allow_decommit || arena->blocks_committed == NULL) { mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c) } else { mi_assert_internal(arena->blocks_committed != NULL); - _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails - // todo: use reset instead of decommit on windows? - _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_assert_internal(arena->blocks_purge != NULL); + mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } + // and make it available to others again bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!all_inuse) { @@ -368,6 +534,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, } } + /* ----------------------------------------------------------- Add an arena. ----------------------------------------------------------- */ @@ -399,13 +566,17 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is is_committed = true; } + const bool allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory + const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - const size_t bitmaps = (is_committed ? 2 : 3); + const size_t bitmaps = (allow_decommit ? 4 : 2); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; + // already zero'd due to os_alloc + // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); arena->exclusive = exclusive; arena->block_count = bcount; @@ -414,11 +585,12 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; arena->is_zero_init = is_zero; - arena->allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory + arena->allow_decommit = allow_decommit; + arena->purge_expire = 0; arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap - // the bitmaps are already zero initialized due to os_alloc + arena->blocks_purge = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap // initialize committed bitmap? if (arena->blocks_committed != NULL && is_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning diff --git a/src/bitmap.h b/src/bitmap.h index 435c5f0a..0326be31 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -71,6 +71,10 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel // Returns `true` if all `count` bits were 1 previously. bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero); diff --git a/src/options.c b/src/options.c index 79b4e310..fb3a7520 100644 --- a/src/options.c +++ b/src/options.c @@ -87,7 +87,8 @@ static mi_option_desc_t options[_mi_option_last] = { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! - { 0, UNINIT, MI_OPTION(eager_reserve) } // reserve memory N KiB at a time (slower in v1.x due to regions) + { 0, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (slower in v1.x due to regions) + { 500, UNINIT, MI_OPTION(arena_purge_delay) } // reset/decommit delay in milli-seconds for arena allocation }; static void mi_option_init(mi_option_desc_t* desc); @@ -126,7 +127,7 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma } mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { - mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_eager_reserve); + mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_arena_reserve); long x = mi_option_get(option); return (x < 0 ? 0 : (size_t)x * MI_KiB); } @@ -524,7 +525,7 @@ static void mi_option_init(mi_option_desc_t* desc) { else { char* end = buf; long value = strtol(buf, &end, 10); - if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_eager_reserve) { + if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_arena_reserve) { // this option is interpreted in KiB to prevent overflow of `long` if (*end == 'K') { end++; } else if (*end == 'M') { value *= MI_KiB; end++; } From 595add5e3d2102cb7edc1b8c9fdcd3e20efdae34 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 31 Mar 2023 20:51:35 -0700 Subject: [PATCH 003/102] wip: initial work on purgable arenas --- include/mimalloc/internal.h | 2 +- src/arena.c | 79 ++++++++++++++++++++----------------- src/bitmap.c | 16 +++++++- src/bitmap.h | 2 +- src/heap.c | 1 + src/init.c | 1 + src/region.c | 6 +-- 7 files changed, 65 insertions(+), 42 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2d8269e0..e73cbbba 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -117,7 +117,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinn void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id); bool _mi_arena_is_os_allocated(size_t arena_memid); - +void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld); diff --git a/src/arena.c b/src/arena.c index 679407c9..8ab0a670 100644 --- a/src/arena.c +++ b/src/arena.c @@ -287,14 +287,15 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena - size_t eager_reserve = mi_option_get_size(mi_option_arena_reserve); - eager_reserve = _mi_align_up(eager_reserve, MI_ARENA_BLOCK_SIZE); - if (eager_reserve > 0 && eager_reserve >= size && // eager reserve enabled and large enough? + size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); + arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); + if (arena_reserve > 0 && arena_reserve >= size && // eager reserve enabled and large enough? req_arena_id == _mi_arena_id_none() && // not exclusive? mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? { mi_arena_id_t arena_id = 0; - if (mi_reserve_os_memory_ex(eager_reserve, false /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { + const bool arena_commit = _mi_os_has_overcommit(); + if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; } @@ -383,7 +384,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t } } -// return true if the full range was purged +// return true if the full range was purged. static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { const size_t endidx = startidx + bitlen; size_t bitidx = startidx; @@ -401,12 +402,12 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, all_purged = true; } } - bitidx += (count+1); + bitidx += (count+1); // +1 to skip the zero bit (or end) } return all_purged; } -// returns true if anything was decommitted +// returns true if anything was purged static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) { if (!arena->allow_decommit || arena->blocks_purge == NULL) return false; @@ -425,35 +426,33 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi if (purge != 0) { size_t bitidx = 0; while (bitidx < MI_BITMAP_FIELD_BITS) { - size_t bitlen = 1; - if ((purge & ((size_t)1 << bitidx)) != 0) { - while ((bitidx + bitlen < MI_BITMAP_FIELD_BITS) && - ((purge & ((size_t)1 << (bitidx + bitlen))) != 0)) { bitlen++; } - // found range of purgeable blocks - // try to claim the longest range of corresponding in_use bits - const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); - while( bitlen > 0 ) { - if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { - break; - } - bitlen--; - } - // claimed count bits at in_use - if (bitlen > 0) { - // read purge again now that we have the in_use bits - purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); - if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { - full_purge = false; - } - any_purged = true; - } - else { - bitlen = 1; // make progress - } + // find length 1 bit range + size_t bitlen = 0; + while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) { + bitlen++; } - bitidx += bitlen; - } - } + // try to claim the longest range of corresponding in_use bits + const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); + while( bitlen > 0 ) { + if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { + break; + } + bitlen--; + } + // claimed count bits at in_use + if (bitlen > 0) { + // read purge again now that we have the in_use bits + purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); + if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { + full_purge = false; + } + any_purged = true; + // release claimed in_use bits again + _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index); + } + bitidx += (bitlen+1); // +1 to skip the zero (or end) + } // while bitidx + } // purge != 0 } return any_purged; } @@ -532,9 +531,17 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, return; }; } + + // purge expired decommits + mi_arenas_try_purge(false, false, stats); } +void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats) { + MI_UNUSED(free_arenas); // todo + mi_arenas_try_purge(force_decommit, true, stats); +} + /* ----------------------------------------------------------- Add an arena. ----------------------------------------------------------- */ @@ -566,7 +573,7 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is is_committed = true; } - const bool allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory + const bool allow_decommit = !is_large; // && !is_committed; // only allow decommit for initially uncommitted memory const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); diff --git a/src/bitmap.c b/src/bitmap.c index 8483de0b..0b6fec70 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -118,7 +118,7 @@ bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, c // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. -bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); @@ -153,6 +153,20 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size return ((field & mask) == mask); } +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + size_t expected = 0; + if (mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, mask)) return true; + if ((expected & mask) != 0) return false; + return mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask); +} + + bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); } diff --git a/src/bitmap.h b/src/bitmap.h index 0326be31..266f140a 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -69,7 +69,7 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. -bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); // Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. // Returns `true` if successful when all previous `count` bits were 0. diff --git a/src/heap.c b/src/heap.c index 84b0ec4f..31a8b660 100644 --- a/src/heap.c +++ b/src/heap.c @@ -159,6 +159,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions on program-exit (or shared library unload) if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { _mi_mem_collect(&heap->tld->os); + _mi_arena_collect(false,true,&heap->tld->stats); } } diff --git a/src/init.c b/src/init.c index 38f62f65..61245cd1 100644 --- a/src/init.c +++ b/src/init.c @@ -591,6 +591,7 @@ static void mi_cdecl mi_process_done(void) { if (mi_option_is_enabled(mi_option_destroy_on_exit)) { _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) _mi_mem_collect(&_mi_heap_main_get()->tld->os); // release all regions + _mi_arena_collect(true,true,&_mi_heap_main_get()->tld->stats); } if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { diff --git a/src/region.c b/src/region.c index 6c8ffb79..809b9ec8 100644 --- a/src/region.c +++ b/src/region.c @@ -289,7 +289,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* bool commit_zero = false; if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { // failed to commit! unclaim and return - mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + _mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); return NULL; } if (commit_zero) *is_zero = true; @@ -306,7 +306,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* // some blocks are still reset mi_assert_internal(!info.x.is_large && !info.x.is_pinned); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); - mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); + _mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed bool reset_zero = false; _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); @@ -426,7 +426,7 @@ void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, s } // and unclaim - bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + bool all_unclaimed = _mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed); } } From c344bf5c20b7357fa84ad62d5bac7c0b8c7706fc Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 31 Mar 2023 21:18:50 -0700 Subject: [PATCH 004/102] wip: work on purgable arenas --- src/segment-cache.c | 2 +- src/segment.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index eeae1b50..58b98df3 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -16,7 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "./bitmap.h" // atomic bitmap -//#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache +// #define MI_CACHE_DISABLE 1 // define to completely disable the segment cache #define MI_CACHE_FIELDS (16) #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit diff --git a/src/segment.c b/src/segment.c index 3e56d50f..8825ce52 100644 --- a/src/segment.c +++ b/src/segment.c @@ -11,7 +11,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset #include -#define MI_PAGE_HUGE_ALIGN (256*1024) +#define MI_USE_SEGMENT_CACHE 0 +#define MI_PAGE_HUGE_ALIGN (256*1024) static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); @@ -394,8 +395,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); - if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache - !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) +#if MI_USE_SEGMENT_CACHE + if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache + || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) +#endif { if (!segment->mem_is_pinned) { const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); @@ -809,10 +812,14 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = NULL; + #if MI_USE_SEGMENT_CACHE // get from cache? if (page_alignment == 0) { segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); } + #else + MI_UNUSED(pdecommit_mask); + #endif // get from OS if (segment==NULL) { From 92556e0e629b0cb59183f264b800cd0f88b14e22 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 12:45:04 -0700 Subject: [PATCH 005/102] bump version for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index ddbef08b..923c0e14 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 1) set(mi_version_minor 8) -set(mi_version_patch 1) +set(mi_version_patch 2) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 564be236..0ecb056f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 181 // major + 2 digits minor +#define MI_MALLOC_VERSION 182 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 94a867869e98e5113ffda4bc91c2668969bb38f1 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 13:53:43 -0700 Subject: [PATCH 006/102] wip: purgeable arenas; fix asan warnings --- src/arena.c | 18 +++++++++++++----- src/os.c | 3 +++ src/prim/unix/prim.c | 3 ++- src/segment.c | 12 +++++++++--- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/arena.c b/src/arena.c index c4665a8f..c99267c8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -133,7 +133,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; @@ -189,6 +189,8 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren // no need to commit, but check if already fully committed *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } + + mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE); return p; } @@ -300,7 +302,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; } - } + } } // finally, fall back to the OS @@ -356,10 +358,11 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, const size_t size = blocks * MI_ARENA_BLOCK_SIZE; void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE); const bool decommitted = mi_os_purge(p, size, stats); + // clear the purged blocks + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); // update committed bitmap if (decommitted) { _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); - _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); } } @@ -520,14 +523,19 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - + // potentially decommit if (!arena->allow_decommit || arena->blocks_committed == NULL) { - mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c) + mi_assert_internal(all_committed); } else { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); + if (!all_committed) { + // assume the entire range as no longer committed + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + } + // (delay) purge the entire range mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } diff --git a/src/os.c b/src/os.c index 6145ccb3..8bcdbf7b 100644 --- a/src/os.c +++ b/src/os.c @@ -411,6 +411,9 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) if (err != 0) { _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } + else { + mi_track_mem_undefined(start, csize); + } return (err == 0); } diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 011ffa7c..09c76f90 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -380,7 +380,8 @@ int _mi_prim_commit(void* start, size_t size, bool commit) { } int _mi_prim_reset(void* start, size_t size) { - #if defined(MADV_FREE) + // note: disable the use of MADV_FREE since it leads to confusing stats :-( + #if 0 // defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); int err; diff --git a/src/segment.c b/src/segment.c index 8825ce52..e4381e74 100644 --- a/src/segment.c +++ b/src/segment.c @@ -400,12 +400,18 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) #endif { - if (!segment->mem_is_pinned) { + // if not all committed, an arena may decommit the whole area, but that double counts + // the already decommitted parts; adjust for that in the stats. + if (!mi_commit_mask_is_full(&segment->commit_mask)) { const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - if (csize > 0) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } + mi_assert_internal(size > csize); + if (size > csize) { + _mi_stat_increase(&_mi_stats_main.committed, size - csize); + } } _mi_abandoned_await_readers(); // wait until safe to free - _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats); + _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, + mi_commit_mask_is_full(&segment->commit_mask) /* all committed? */, tld->stats); } } From f5ab38f87b692371a5aba6ce7cb942ac20022321 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 15:06:09 -0700 Subject: [PATCH 007/102] wip: use purge throughout for segments and arenas; more agressive delays --- doc/mimalloc-doc.h | 8 +- include/mimalloc.h | 14 ++-- include/mimalloc/internal.h | 5 +- include/mimalloc/types.h | 9 ++- src/arena.c | 23 ++---- src/options.c | 16 ++-- src/os.c | 15 ++++ src/region.c | 8 +- src/segment-cache.c | 28 +++---- src/segment.c | 142 +++++++++++++++++++----------------- 10 files changed, 141 insertions(+), 127 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 815901f2..3e75243b 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -821,12 +821,12 @@ typedef enum mi_option_e { mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows) mi_option_segment_reset, ///< Experimental mi_option_reset_delay, ///< Delay in milli-seconds before resetting a page (100ms by default) - mi_option_reset_decommits, ///< Experimental + mi_option_purge_decommits, ///< Experimental // v2.x specific options - mi_option_allow_decommit, ///< Enable decommitting memory (=on) - mi_option_decommit_delay, ///< Decommit page memory after N milli-seconds delay (25ms). - mi_option_segment_decommit_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms). + mi_option_allow_purge, ///< Enable decommitting memory (=on) + mi_option_purge_delay, ///< Decommit page memory after N milli-seconds delay (25ms). + mi_option_segment_purge_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms). _mi_option_last } mi_option_t; diff --git a/include/mimalloc.h b/include/mimalloc.h index 23ac05b7..6ade2e96 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -324,27 +324,27 @@ typedef enum mi_option_e { // some of the following options are experimental // (deprecated options are kept for binary backward compatibility with v1.x versions) mi_option_eager_commit, - mi_option_deprecated_eager_region_commit, - mi_option_reset_decommits, + mi_option_eager_arena_commit, + mi_option_purge_decommits, mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup mi_option_deprecated_segment_cache, mi_option_page_reset, - mi_option_abandoned_page_decommit, + mi_option_abandoned_page_purge, mi_option_deprecated_segment_reset, mi_option_eager_commit_delay, - mi_option_decommit_delay, + mi_option_purge_delay, mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes. mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas) mi_option_os_tag, mi_option_max_errors, mi_option_max_warnings, mi_option_max_segment_reclaim, - mi_option_allow_decommit, - mi_option_segment_decommit_delay, - mi_option_decommit_extend_delay, + mi_option_allow_purge, + mi_option_deprecated_segment_decommit_delay, + mi_option_purge_extend_delay, mi_option_destroy_on_exit, mi_option_arena_reserve, mi_option_arena_purge_delay, diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2108a909..e97e7d91 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -93,6 +93,7 @@ size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); @@ -120,8 +121,8 @@ bool _mi_arena_is_os_allocated(size_t arena_memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); // "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); void _mi_segment_cache_free_all(mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index c7ddaaae..38b13883 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) // 1MiB #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS @@ -379,9 +379,10 @@ typedef struct mi_segment_s { size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) - bool allow_decommit; - mi_msecs_t decommit_expire; - mi_commit_mask_t decommit_mask; + bool allow_decommit; + bool allow_purge; + mi_msecs_t purge_expire; + mi_commit_mask_t purge_mask; mi_commit_mask_t commit_mask; _Atomic(struct mi_segment_s*) abandoned_next; diff --git a/src/arena.c b/src/arena.c index c99267c8..1f0dd2f8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -297,7 +297,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? { mi_arena_id_t arena_id = 0; - const bool arena_commit = _mi_os_has_overcommit(); + const bool arena_commit = _mi_os_has_overcommit() || mi_option_is_enabled(mi_option_eager_arena_commit); if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; @@ -336,20 +336,6 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { Arena purge ----------------------------------------------------------- */ -// either resets or decommits memory, returns true if the memory was decommitted. -static bool mi_os_purge(void* p, size_t size, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits) && // should decommit? - !_mi_preloading()) // don't decommit during preloading (unsafe) - { - _mi_os_decommit(p, size, stats); - return true; // decommitted - } - else { - _mi_os_reset(p, size, stats); - return false; // not decommitted - } -} - // reset or decommit in an arena and update the committed/decommit bitmaps static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_committed != NULL); @@ -357,7 +343,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_assert_internal(arena->allow_decommit); const size_t size = blocks * MI_ARENA_BLOCK_SIZE; void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE); - const bool decommitted = mi_os_purge(p, size, stats); + const bool decommitted = _mi_os_purge(p, size, stats); // clear the purged blocks _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); // update committed bitmap @@ -369,6 +355,8 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, // Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_purge != NULL); + if (!mi_option_is_enabled(mi_option_allow_purge)) return; + const long delay = mi_option_get(mi_option_arena_purge_delay); if (_mi_preloading() || delay == 0) { // decommit directly @@ -468,7 +456,8 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { const long delay = mi_option_get(mi_option_arena_purge_delay); - if (_mi_preloading() || delay == 0 /* || !mi_option_is_enabled(mi_option_allow_decommit) */) return; // nothing will be scheduled + if (_mi_preloading() || delay == 0 || !mi_option_is_enabled(mi_option_allow_purge)) return; // nothing will be scheduled + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); if (max_arena == 0) return; diff --git a/src/options.c b/src/options.c index 239ec308..44eee4b6 100644 --- a/src/options.c +++ b/src/options.c @@ -60,15 +60,15 @@ static mi_option_desc_t options[_mi_option_last] = // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, + { 0, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) }, + { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates + { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge, abandoned_page_decommit) },// decommit free page memory when a thread terminates { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed @@ -77,23 +77,23 @@ static mi_option_desc_t options[_mi_option_last] = #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif - { 25, UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds + { 10, UNINIT, MI_OPTION_LEGACY(purge_delay, decommit_delay) }, // page decommit delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. - { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) - { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments - { 1, UNINIT, MI_OPTION(decommit_extend_delay) }, + { 1, UNINIT, MI_OPTION_LEGACY(allow_purge, allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) + { 100, UNINIT, MI_OPTION(deprecated_segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments + { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! #if (MI_INTPTR_SIZE>4) { 1024L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time #else { 128L*1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif - { 500, UNINIT, MI_OPTION(arena_purge_delay) } // reset/decommit delay in milli-seconds for arena allocation + { 100, UNINIT, MI_OPTION(arena_purge_delay) } // reset/decommit delay in milli-seconds for arena allocation }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index 8bcdbf7b..20c6f8eb 100644 --- a/src/os.c +++ b/src/os.c @@ -436,6 +436,21 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat } */ +// either resets or decommits memory, returns true if the memory was decommitted. +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) { + if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? + !_mi_preloading()) // don't decommit during preloading (unsafe) + { + _mi_os_decommit(p, size, stats); + return true; // decommitted + } + else { + _mi_os_reset(p, size, stats); + return false; // not decommitted + } +} + + // Protect a region in memory to be not accessible. static bool mi_os_protectx(void* addr, size_t size, bool protect) { // page align conservatively within the range diff --git a/src/region.c b/src/region.c index 809b9ec8..b01d4091 100644 --- a/src/region.c +++ b/src/region.c @@ -307,7 +307,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* mi_assert_internal(!info.x.is_large && !info.x.is_pinned); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); _mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed + if (*commit || !mi_option_is_enabled(mi_option_purge_decommits)) { // only if needed bool reset_zero = false; _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); if (reset_zero) *is_zero = true; @@ -415,7 +415,7 @@ void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, s // reset the blocks to reduce the working set. if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) && (mi_option_is_enabled(mi_option_eager_commit) || - mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead + mi_option_is_enabled(mi_option_purge_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { bool any_unreset; _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); @@ -467,7 +467,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { -----------------------------------------------------------------------------*/ bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { + if (mi_option_is_enabled(mi_option_purge_decommits)) { return _mi_os_decommit(p, size, tld->stats); } else { @@ -476,7 +476,7 @@ bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { } bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { + if (mi_option_is_enabled(mi_option_purge_decommits)) { return _mi_os_commit(p, size, is_zero, tld->stats); } else { diff --git a/src/segment-cache.c b/src/segment-cache.c index 58b98df3..a98e6b07 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -29,7 +29,7 @@ typedef struct mi_cache_slot_s { size_t memid; bool is_pinned; mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; + mi_commit_mask_t purge_mask; _Atomic(mi_msecs_t) expire; } mi_cache_slot_t; @@ -48,7 +48,7 @@ static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void mi_decl_noinline static void* mi_segment_cache_pop_ex( bool all_suitable, size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* decommit_mask, bool large_allowed, + mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { @@ -96,7 +96,7 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex( *is_pinned = slot->is_pinned; *is_zero = false; *commit_mask = slot->commit_mask; - *decommit_mask = slot->decommit_mask; + *purge_mask = slot->purge_mask; slot->p = NULL; mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); @@ -107,9 +107,9 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex( } -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { - return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); + return mi_segment_cache_pop_ex(false, size, commit_mask, purge_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); } static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) @@ -142,7 +142,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) { MI_UNUSED(tld); - if (!mi_option_is_enabled(mi_option_allow_decommit)) return; + if (!mi_option_is_enabled(mi_option_allow_purge)) return; mi_msecs_t now = _mi_clock_now(); size_t purged = 0; const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); @@ -170,7 +170,7 @@ static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, // decommit committed parts // TODO: instead of decommit, we could also free to the OS? mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->decommit_mask); + mi_commit_mask_create_empty(&slot->purge_mask); } _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } @@ -191,7 +191,7 @@ void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { void _mi_segment_cache_free_all(mi_os_tld_t* tld) { mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; + mi_commit_mask_t purge_mask; bool is_pinned; bool is_zero; bool is_large; @@ -200,7 +200,7 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) { void* p; do { // keep popping and freeing the memory - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, + p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &purge_mask, true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); if (p != NULL) { size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); @@ -210,7 +210,7 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) { } while (p != NULL); } -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) +mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return false; @@ -257,13 +257,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me slot->is_pinned = is_pinned; mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); slot->commit_mask = *commit_mask; - slot->decommit_mask = *decommit_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { - long delay = mi_option_get(mi_option_segment_decommit_delay); + slot->purge_mask = *purge_mask; + if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_purge)) { + long delay = mi_option_get(mi_option_arena_purge_delay); if (delay == 0) { _mi_abandoned_await_readers(); // wait until safe to decommit mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->decommit_mask); + mi_commit_mask_create_empty(&slot->purge_mask); } else { mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); diff --git a/src/segment.c b/src/segment.c index e4381e74..65b21d94 100644 --- a/src/segment.c +++ b/src/segment.c @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_USE_SEGMENT_CACHE 0 #define MI_PAGE_HUGE_ALIGN (256*1024) -static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); +static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); // ------------------------------------------------------------------- @@ -258,7 +258,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // can only decommit committed blocks //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -390,14 +390,14 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_os_unprotect(end, os_pagesize); } - // purge delayed decommits now? (no, leave it to the cache) - // mi_segment_delayed_decommit(segment,true,tld->stats); + // purge delayed decommits now? (no, leave it to the arena) + // mi_segment_delayed_purge(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); #if MI_USE_SEGMENT_CACHE if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache - || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) + || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->purge_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) #endif { // if not all committed, an arena may decommit the whole area, but that double counts @@ -478,7 +478,7 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // commit liberal, but decommit conservative uint8_t* start = NULL; @@ -488,6 +488,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + // committing bool is_zero = false; mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); @@ -496,41 +497,47 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s mi_commit_mask_set(&segment->commit_mask, &mask); } else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { + // purging mi_assert_internal((void*)start != (void*)segment); - //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask)); - - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (segment->allow_decommit) { - _mi_os_decommit(start, full_size, stats); // ok if this fails - } - mi_commit_mask_clear(&segment->commit_mask, &mask); + if (mi_option_is_enabled(mi_option_allow_purge)) { + if (segment->allow_decommit) { + const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit + if (decommitted) { + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting + mi_commit_mask_clear(&segment->commit_mask, &mask); + } + } + else if (segment->allow_purge) { + _mi_os_reset(start, full_size, stats); + } + } } // increase expiration of reusing part of the delayed decommit - if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) { - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); + if (commit && mi_commit_mask_any_set(&segment->purge_mask, &mask)) { + segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay); } - // always undo delayed decommits - mi_commit_mask_clear(&segment->decommit_mask, &mask); + // always undo delayed purges + mi_commit_mask_clear(&segment->purge_mask, &mask); return true; } static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow - if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed + if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); return mi_segment_commitx(segment,true,p,size,stats); } -static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - if (!segment->allow_decommit) return; - if (mi_option_get(mi_option_decommit_delay) == 0) { +static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + if (!segment->allow_purge) return; + if (mi_option_get(mi_option_purge_delay) == 0) { mi_segment_commitx(segment, false, p, size, stats); } else { - // register for future decommit in the decommit mask + // register for future purge in the purge mask uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; @@ -538,39 +545,39 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ if (mi_commit_mask_is_empty(&mask) || full_size==0) return; // update delayed commit - mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask)); mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more - mi_commit_mask_set(&segment->decommit_mask, &cmask); + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only purge what is committed; span_free may try to decommit more + mi_commit_mask_set(&segment->purge_mask, &cmask); mi_msecs_t now = _mi_clock_now(); - if (segment->decommit_expire == 0) { + if (segment->purge_expire == 0) { // no previous decommits, initialize now - segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay); + segment->purge_expire = now + mi_option_get(mi_option_purge_delay); } - else if (segment->decommit_expire <= now) { + else if (segment->purge_expire <= now) { // previous decommit mask already expired - if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) { - mi_segment_delayed_decommit(segment, true, stats); + if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) { + mi_segment_delayed_purge(segment, true, stats); } else { - segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's } } else { // previous decommit mask is not yet expired, increase the expiration by a bit. - segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay); + segment->purge_expire += mi_option_get(mi_option_purge_extend_delay); } } } -static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { - if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return; +static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { + if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return; mi_msecs_t now = _mi_clock_now(); - if (!force && now < segment->decommit_expire) return; + if (!force && now < segment->purge_expire) return; - mi_commit_mask_t mask = segment->decommit_mask; - segment->decommit_expire = 0; - mi_commit_mask_create_empty(&segment->decommit_mask); + mi_commit_mask_t mask = segment->purge_mask; + segment->purge_expire = 0; + mi_commit_mask_create_empty(&segment->purge_mask); size_t idx; size_t count; @@ -583,7 +590,7 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st } } mi_commit_mask_foreach_end() - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); } @@ -596,7 +603,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) { } // note: can be called on abandoned segments -static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) { +static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) ? NULL : mi_span_queue_for(slice_count,tld)); @@ -616,8 +623,8 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size } // perhaps decommit - if (allow_decommit) { - mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); + if (allow_purge) { + mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); } // and push it on the free page queue (if it was not a huge page) @@ -794,7 +801,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id, size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, - mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask, + mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* ppurge_mask, bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { @@ -821,10 +828,10 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment #if MI_USE_SEGMENT_CACHE // get from cache? if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, ppurge_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); } #else - MI_UNUSED(pdecommit_mask); + MI_UNUSED(ppurge_mask); #endif // get from OS @@ -886,13 +893,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi bool is_zero = false; mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; + mi_commit_mask_t purge_mask; mi_commit_mask_create_empty(&commit_mask); - mi_commit_mask_create_empty(&decommit_mask); + mi_commit_mask_create_empty(&purge_mask); // Allocate the segment from the OS mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, - &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask, + &segment_slices, &pre_size, &info_slices, &commit_mask, &purge_mask, &is_zero, &commit, tld, os_tld); if (segment == NULL) return NULL; @@ -908,21 +915,22 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi } segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed - segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); - if (segment->allow_decommit) { - segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay); - segment->decommit_mask = decommit_mask; - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; + segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit; + if (segment->allow_purge) { + segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay); + segment->purge_mask = purge_mask; + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); #if MI_DEBUG>2 const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); mi_commit_mask_t commit_needed_mask; mi_commit_mask_create(0, commit_needed, &commit_needed_mask); - mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); + mi_assert_internal(!mi_commit_mask_any_set(&segment->purge_mask, &commit_needed_mask)); #endif } else { - segment->decommit_expire = 0; - mi_commit_mask_create_empty( &segment->decommit_mask ); + segment->purge_expire = 0; + mi_commit_mask_create_empty( &segment->purge_mask ); } // initialize segment info @@ -965,7 +973,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi } else { mi_assert_internal(huge_page!=NULL); - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask)); *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance @@ -1269,8 +1277,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } - // perform delayed decommits - mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats); + // perform delayed decommits (forcing is much slower on mstress) + mi_segment_delayed_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); @@ -1459,7 +1467,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again + mi_segment_delayed_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again mi_abandoned_visited_push(segment); } } @@ -1483,9 +1491,9 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // otherwise, decommit if needed and push on the visited list - // note: forced decommit can be expensive if many threads are destroyed/created as in mstress. - mi_segment_delayed_decommit(segment, force, tld->stats); + // otherwise, purge if needed and push on the visited list + // note: forced purge can be expensive if many threads are destroyed/created as in mstress. + mi_segment_delayed_purge(segment, force, tld->stats); mi_abandoned_visited_push(segment); } } @@ -1543,7 +1551,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); - mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats); + mi_segment_delayed_purge(_mi_ptr_segment(page), false, tld->stats); return page; } From a9f42376b793449396bc5e2d430f40153fecbebc Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 16:17:02 -0700 Subject: [PATCH 008/102] small changes; make minimal commit most fine grained --- include/mimalloc/types.h | 2 +- src/alloc-aligned.c | 4 ++-- src/init.c | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 38b13883..ebbea391 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) // 1MiB +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) // most fine-grained #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index e79a2220..59f5a524 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -146,10 +146,10 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); } -mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { #if !MI_PADDING // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) - if (!_mi_is_power_of_two(alignment)) return NULL; + if mi_unlikely(!_mi_is_power_of_two(alignment)) return NULL; if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) #else // with padding, we can only guarantee this for fixed alignments diff --git a/src/init.c b/src/init.c index 0b4325e3..9378d028 100644 --- a/src/init.c +++ b/src/init.c @@ -37,6 +37,7 @@ const mi_page_t _mi_page_empty = { #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) +#if (MI_SMALL_WSIZE_MAX==128) #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } #elif (MI_PADDING>0) @@ -44,7 +45,9 @@ const mi_page_t _mi_page_empty = { #else #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } #endif - +#else +#error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX" +#endif // Empty page queues for every bin #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } From d22a13c990c8faf0031f7b463c02663bf9d96b8c Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 17:58:28 -0700 Subject: [PATCH 009/102] wip: purgeable arenas, various fixes --- include/mimalloc/types.h | 2 +- src/arena.c | 14 +++++++++++--- src/options.c | 2 +- src/prim/unix/prim.c | 16 ---------------- src/segment.c | 10 +++++----- 5 files changed, 18 insertions(+), 26 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ebbea391..962535e3 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) // most fine-grained +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS diff --git a/src/arena.c b/src/arena.c index 1f0dd2f8..57db9f7e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -182,7 +182,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren if (any_uncommitted) { bool commit_zero; _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); - if (commit_zero) *is_zero = true; + if (commit_zero) { *is_zero = true; } } } else { @@ -190,7 +190,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } - mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE); + // mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE); return p; } @@ -297,7 +297,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? { mi_arena_id_t arena_id = 0; - const bool arena_commit = _mi_os_has_overcommit() || mi_option_is_enabled(mi_option_eager_arena_commit); + + bool arena_commit = _mi_os_has_overcommit(); + if (mi_option_get(mi_option_eager_arena_commit) == 1) { arena_commit = true; } + else if (mi_option_get(mi_option_eager_arena_commit) == 0) { arena_commit = false; } + if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; @@ -513,6 +517,9 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, return; } + // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) + mi_track_mem_undefined(p,size); + // potentially decommit if (!arena->allow_decommit || arena->blocks_committed == NULL) { mi_assert_internal(all_committed); @@ -523,6 +530,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, if (!all_committed) { // assume the entire range as no longer committed _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_track_mem_noaccess(p,size); } // (delay) purge the entire range mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); diff --git a/src/options.c b/src/options.c index 44eee4b6..ca8bf5d8 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - { 0, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) }, + { 2, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) }, { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 09c76f90..8d027ebb 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -342,22 +342,6 @@ static void unix_mprotect_hint(int err) { int _mi_prim_commit(void* start, size_t size, bool commit) { - /* - #if 0 && defined(MAP_FIXED) && !defined(__APPLE__) - // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) - if (commit) { - // commit: just change the protection - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) - const int fd = mi_unix_mmap_fd(); - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } - } - #else - */ int err = 0; if (commit) { // commit: ensure we can access the area diff --git a/src/segment.c b/src/segment.c index 65b21d94..63e47742 100644 --- a/src/segment.c +++ b/src/segment.c @@ -756,7 +756,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); size_t next_index = mi_slice_index(slice) + slice_count; size_t next_count = slice->slice_count - slice_count; - mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld); + mi_segment_span_free(segment, next_index, next_count, false /* don't purge left-over part */, tld); slice->slice_count = (uint32_t)slice_count; } @@ -915,7 +915,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi } segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed - segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; + segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit; if (segment->allow_purge) { segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay); @@ -969,7 +969,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // initialize initial free pages if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page mi_assert_internal(huge_page==NULL); - mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld); + mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't purge */, tld); } else { mi_assert_internal(huge_page!=NULL); @@ -1585,7 +1585,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_assert_internal(psize - (aligned_p - start) >= size); uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list ptrdiff_t decommit_size = aligned_p - decommit_start; - _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } return page; @@ -1630,7 +1630,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc if (segment->allow_decommit) { const size_t csize = mi_usable_size(block) - sizeof(mi_block_t); uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); - _mi_os_decommit(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + _mi_os_reset(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } } #endif From 33d7503fdb1b9fbe047756309455f4223eab55dd Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 19:57:26 -0700 Subject: [PATCH 010/102] rename to arena_eager_commit --- include/mimalloc.h | 2 +- src/arena.c | 4 ++-- src/options.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 6ade2e96..cb408acc 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -324,7 +324,7 @@ typedef enum mi_option_e { // some of the following options are experimental // (deprecated options are kept for binary backward compatibility with v1.x versions) mi_option_eager_commit, - mi_option_eager_arena_commit, + mi_option_arena_eager_commit, mi_option_purge_decommits, mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup diff --git a/src/arena.c b/src/arena.c index 57db9f7e..ca4c87a3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -299,8 +299,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset mi_arena_id_t arena_id = 0; bool arena_commit = _mi_os_has_overcommit(); - if (mi_option_get(mi_option_eager_arena_commit) == 1) { arena_commit = true; } - else if (mi_option_get(mi_option_eager_arena_commit) == 0) { arena_commit = false; } + if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + else if (mi_option_get(mi_option_arena_eager_commit) == 0) { arena_commit = false; } if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); diff --git a/src/options.c b/src/options.c index ca8bf5d8..3eeccaae 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - { 2, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) }, + { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages From 09297ba8cf7d8dd8429440acfcf326754cc58a5a Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 11:46:02 -0700 Subject: [PATCH 011/102] wip: purgeable arenas --- src/arena.c | 7 ++-- src/options.c | 4 +- src/os.c | 5 ++- src/segment.c | 101 ++++++++++++++++++++++++++++---------------------- src/stats.c | 4 +- 5 files changed, 68 insertions(+), 53 deletions(-) diff --git a/src/arena.c b/src/arena.c index ca4c87a3..134a6227 100644 --- a/src/arena.c +++ b/src/arena.c @@ -163,7 +163,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren // none of the claimed blocks should be scheduled for a decommit if (arena->blocks_purge != NULL) { - // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `in_use`). + // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`). _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index); } @@ -176,7 +176,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren *commit = true; } else if (*commit) { - // arena not committed as a whole, but commit requested: ensure commit now + // commit requested, but the range may not be committed as a whole: ensure it is committed now bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { @@ -294,7 +294,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); if (arena_reserve > 0 && arena_reserve >= size && // eager reserve enabled and large enough? req_arena_id == _mi_arena_id_none() && // not exclusive? - mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? + mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) && // not too many arenas already? + !_mi_preloading() ) // and not before main runs { mi_arena_id_t arena_id = 0; diff --git a/src/options.c b/src/options.c index 3eeccaae..bb11b6a5 100644 --- a/src/options.c +++ b/src/options.c @@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] = // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, - { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, + { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N @@ -72,8 +72,6 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - #elif defined(_WIN32) - { 4, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif diff --git a/src/os.c b/src/os.c index 20c6f8eb..f54e2513 100644 --- a/src/os.c +++ b/src/os.c @@ -437,7 +437,10 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat */ // either resets or decommits memory, returns true if the memory was decommitted. -bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) { +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) +{ + if (!mi_option_is_enabled(mi_option_allow_purge)) return false; + if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) { diff --git a/src/segment.c b/src/segment.c index 63e47742..5a324adb 100644 --- a/src/segment.c +++ b/src/segment.c @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_USE_SEGMENT_CACHE 0 #define MI_PAGE_HUGE_ALIGN (256*1024) -static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); +static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); // ------------------------------------------------------------------- @@ -391,7 +391,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { } // purge delayed decommits now? (no, leave it to the arena) - // mi_segment_delayed_purge(segment,true,tld->stats); + // mi_segment_try_purge(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); @@ -476,49 +476,32 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin mi_commit_mask_create(bitidx, bitcount, cm); } - -static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { +static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); - // commit liberal, but decommit conservative + // commit liberal uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; - mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask); - if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + mi_segment_commit_mask(segment, false /* conservative? */, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size == 0) return true; - if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + if (!mi_commit_mask_all_set(&segment->commit_mask, &mask)) { // committing bool is_zero = false; mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; - mi_commit_mask_set(&segment->commit_mask, &mask); + if (!_mi_os_commit(start, full_size, &is_zero, stats)) return false; + mi_commit_mask_set(&segment->commit_mask, &mask); } - else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { - // purging - mi_assert_internal((void*)start != (void*)segment); - if (mi_option_is_enabled(mi_option_allow_purge)) { - if (segment->allow_decommit) { - const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit - if (decommitted) { - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting - mi_commit_mask_clear(&segment->commit_mask, &mask); - } - } - else if (segment->allow_purge) { - _mi_os_reset(start, full_size, stats); - } - } - } - // increase expiration of reusing part of the delayed decommit - if (commit && mi_commit_mask_any_set(&segment->purge_mask, &mask)) { + + // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon. + if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) { segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay); } - // always undo delayed purges + + // always clear any delayed purges in our range (as they are either committed now) mi_commit_mask_clear(&segment->purge_mask, &mask); return true; } @@ -528,13 +511,43 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_ // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - return mi_segment_commitx(segment,true,p,size,stats); + return mi_segment_commit(segment, p, size, stats); +} + +static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); + if (!segment->allow_purge) return true; + + // purge conservative + uint8_t* start = NULL; + size_t full_size = 0; + mi_commit_mask_t mask; + mi_segment_commit_mask(segment, true /* conservative? */, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + + if (mi_commit_mask_any_set(&segment->commit_mask, &mask)) { + // purging + mi_assert_internal((void*)start != (void*)segment); + mi_assert_internal(segment->allow_decommit); + const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit + if (decommitted) { + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting + mi_commit_mask_clear(&segment->commit_mask, &mask); + } + } + + // always clear any scheduled purges in our range + mi_commit_mask_clear(&segment->purge_mask, &mask); + return true; } static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { if (!segment->allow_purge) return; + if (mi_option_get(mi_option_purge_delay) == 0) { - mi_segment_commitx(segment, false, p, size, stats); + mi_segment_purge(segment, p, size, stats); } else { // register for future purge in the purge mask @@ -551,26 +564,26 @@ static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t mi_commit_mask_set(&segment->purge_mask, &cmask); mi_msecs_t now = _mi_clock_now(); if (segment->purge_expire == 0) { - // no previous decommits, initialize now + // no previous purgess, initialize now segment->purge_expire = now + mi_option_get(mi_option_purge_delay); } else if (segment->purge_expire <= now) { - // previous decommit mask already expired + // previous purge mask already expired if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) { - mi_segment_delayed_purge(segment, true, stats); + mi_segment_try_purge(segment, true, stats); } else { segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's } } else { - // previous decommit mask is not yet expired, increase the expiration by a bit. + // previous purge mask is not yet expired, increase the expiration by a bit. segment->purge_expire += mi_option_get(mi_option_purge_extend_delay); } } } -static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { +static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return; mi_msecs_t now = _mi_clock_now(); if (!force && now < segment->purge_expire) return; @@ -586,7 +599,7 @@ static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats if (count > 0) { uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); size_t size = count * MI_COMMIT_SIZE; - mi_segment_commitx(segment, false, p, size, stats); + mi_segment_purge(segment, p, size, stats); } } mi_commit_mask_foreach_end() @@ -916,7 +929,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; - segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit; + segment->allow_purge = segment->allow_decommit && mi_option_is_enabled(mi_option_allow_purge); if (segment->allow_purge) { segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay); segment->purge_mask = purge_mask; @@ -1278,7 +1291,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { } // perform delayed decommits (forcing is much slower on mstress) - mi_segment_delayed_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); + mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); @@ -1467,7 +1480,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again + mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again mi_abandoned_visited_push(segment); } } @@ -1493,7 +1506,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) else { // otherwise, purge if needed and push on the visited list // note: forced purge can be expensive if many threads are destroyed/created as in mstress. - mi_segment_delayed_purge(segment, force, tld->stats); + mi_segment_try_purge(segment, force, tld->stats); mi_abandoned_visited_push(segment); } } @@ -1551,7 +1564,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); - mi_segment_delayed_purge(_mi_ptr_segment(page), false, tld->stats); + mi_segment_try_purge(_mi_ptr_segment(page), false, tld->stats); return page; } diff --git a/src/stats.c b/src/stats.c index d2a31681..cc87513d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -111,7 +111,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); - + mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1); @@ -331,7 +331,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); - mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); From 449aad0635616884b29d2c4243837b5db4dee428 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 12:15:57 -0700 Subject: [PATCH 012/102] wip: purgeable arenas; update option names and improve arena reservation --- include/mimalloc.h | 16 +++-- include/mimalloc/internal.h | 1 + src/arena.c | 121 +++++++++++++++++++++++++----------- src/options.c | 5 +- src/os.c | 19 ++++++ 5 files changed, 119 insertions(+), 43 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index acc1db68..032faa4b 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -319,9 +319,9 @@ typedef enum mi_option_e { mi_option_show_stats, // print statistics on termination mi_option_verbose, // print verbose messages // the following options are experimental (see src/options.h) - mi_option_eager_commit, - mi_option_eager_region_commit, - mi_option_reset_decommits, + mi_option_segment_eager_commit, + mi_option_arena_eager_commit, + mi_option_purge_decommits, mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node @@ -331,7 +331,7 @@ typedef enum mi_option_e { mi_option_abandoned_page_reset, mi_option_segment_reset, mi_option_eager_commit_delay, - mi_option_reset_delay, + mi_option_purge_delay, mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes. mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas) mi_option_os_tag, @@ -341,7 +341,13 @@ typedef enum mi_option_e { mi_option_destroy_on_exit, mi_option_arena_reserve, mi_option_arena_purge_delay, - _mi_option_last + mi_option_allow_purge, + _mi_option_last, + // legacy options + mi_option_eager_commit = mi_option_segment_eager_commit, + mi_option_eager_region_commit = mi_option_arena_eager_commit, + mi_option_reset_decommits = mi_option_purge_decommits, + mi_option_reset_delay = mi_option_purge_delay } mi_option_t; diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e73cbbba..5904198f 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -98,6 +98,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); diff --git a/src/arena.c b/src/arena.c index f7912a9e..64cb1624 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,3 +1,5 @@ + + /* ---------------------------------------------------------------------------- Copyright (c) 2019-2022, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the @@ -7,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. @@ -31,8 +33,11 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo Arena allocation ----------------------------------------------------------- */ -#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 32MiB -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB +// Block info: bit 0 contains the `in_use` bit, the upper bits the +// size in count of arena blocks. +typedef uintptr_t mi_block_info_t; +#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB #define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor @@ -103,7 +108,6 @@ static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_ } static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { - mi_assert_internal(arena_memid != MI_MEMID_OS); *bitmap_index = (arena_memid >> 8); mi_arena_id_t id = (int)(arena_memid & 0x7F); *arena_index = mi_arena_id_index(id); @@ -111,7 +115,6 @@ static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_b } bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) { - mi_assert_internal(arena_memid != MI_MEMID_OS); mi_arena_id_t id = (int)(arena_memid & 0x7F); bool exclusive = ((arena_memid & 0x80) != 0); return mi_arena_id_is_suitable(id, exclusive, request_arena_id); @@ -130,9 +133,9 @@ static size_t mi_block_count_of_size(size_t size) { ----------------------------------------------------------- */ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { - size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search + size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; @@ -143,9 +146,9 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena Allocation ----------------------------------------------------------- */ -static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) +static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { MI_UNUSED(arena_index); mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); @@ -162,7 +165,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n // none of the claimed blocks should be scheduled for a decommit if (arena->blocks_purge != NULL) { - // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `in_use`). + // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`). _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index); } @@ -175,19 +178,21 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n *commit = true; } else if (*commit) { - // arena not committed as a whole, but commit requested: ensure commit now + // commit requested, but the range may not be committed as a whole: ensure it is committed now bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { bool commit_zero; _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); - if (commit_zero) *is_zero = true; + if (commit_zero) { *is_zero = true; } } } else { // no need to commit, but check if already fully committed *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } + + // mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE); return p; } @@ -291,15 +296,20 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); if (arena_reserve > 0 && arena_reserve >= size && // eager reserve enabled and large enough? req_arena_id == _mi_arena_id_none() && // not exclusive? - mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? + mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) && // not too many arenas already? + !_mi_preloading() ) // and not before main runs { mi_arena_id_t arena_id = 0; - const bool arena_commit = _mi_os_has_overcommit(); + + bool arena_commit = _mi_os_has_overcommit(); + if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + else if (mi_option_get(mi_option_arena_eager_commit) == 0) { arena_commit = false; } + if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; } - } + } } // finally, fall back to the OS @@ -319,7 +329,6 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, b return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); } - void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; size_t arena_index = mi_arena_id_index(arena_id); @@ -334,20 +343,6 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { Arena purge ----------------------------------------------------------- */ -// either resets or decommits memory, returns true if the memory was decommitted. -static bool mi_os_purge(void* p, size_t size, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits) && // should decommit? - !_mi_preloading()) // don't decommit during preloading (unsafe) - { - _mi_os_decommit(p, size, stats); - return true; // decommitted - } - else { - _mi_os_reset(p, size, stats); - return false; // not decommitted - } -} - // reset or decommit in an arena and update the committed/decommit bitmaps static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_committed != NULL); @@ -355,17 +350,20 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_assert_internal(arena->allow_decommit); const size_t size = blocks * MI_ARENA_BLOCK_SIZE; void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE); - const bool decommitted = mi_os_purge(p, size, stats); + const bool decommitted = _mi_os_purge(p, size, stats); + // clear the purged blocks + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); // update committed bitmap if (decommitted) { _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); - _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); } } // Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_purge != NULL); + if (!mi_option_is_enabled(mi_option_allow_purge)) return; + const long delay = mi_option_get(mi_option_arena_purge_delay); if (_mi_preloading() || delay == 0) { // decommit directly @@ -454,12 +452,19 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi } // while bitidx } // purge != 0 } + // if not fully purged, make sure to purge again in the future + if (!full_purge) { + const long delay = mi_option_get(mi_option_arena_purge_delay); + mi_msecs_t expected = 0; + mi_atomic_cas_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); + } return any_purged; } static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { const long delay = mi_option_get(mi_option_arena_purge_delay); - if (_mi_preloading() || delay == 0) return; // nothing will be scheduled + if (_mi_preloading() || delay == 0 || !mi_option_is_enabled(mi_option_allow_purge)) return; // nothing will be scheduled + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); if (max_arena == 0) return; @@ -488,6 +493,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, mi_assert_internal(size > 0 && stats != NULL); if (p==NULL) return; if (size==0) return; + if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats); @@ -513,14 +519,23 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - + + // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) + mi_track_mem_undefined(p,size); + // potentially decommit if (!arena->allow_decommit || arena->blocks_committed == NULL) { - mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c) + mi_assert_internal(all_committed); } else { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); + if (!all_committed) { + // assume the entire range as no longer committed + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_track_mem_noaccess(p,size); + } + // (delay) purge the entire range mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } @@ -641,6 +656,39 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe } +/* ----------------------------------------------------------- + Debugging +----------------------------------------------------------- */ + +static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) { + size_t inuse_count = 0; + for (size_t i = 0; i < field_count; i++) { + char buf[MI_BITMAP_FIELD_BITS + 1]; + uintptr_t field = mi_atomic_load_relaxed(&fields[i]); + for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) { + bool inuse = ((((uintptr_t)1 << bit) & field) != 0); + if (inuse) inuse_count++; + buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.'); + } + buf[MI_BITMAP_FIELD_BITS] = 0; + _mi_verbose_message("%s%s\n", prefix, buf); + } + return inuse_count; +} + +void mi_debug_show_arenas(void) mi_attr_noexcept { + size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count); + for (size_t i = 0; i < max_arenas; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; + size_t inuse_count = 0; + _mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count); + inuse_count += mi_debug_show_bitmap(" ", arena->blocks_inuse, arena->field_count); + _mi_verbose_message(" blocks in use ('x'): %zu\n", inuse_count); + } +} + + /* ----------------------------------------------------------- Reserve a huge page arena. ----------------------------------------------------------- */ @@ -706,3 +754,4 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } + diff --git a/src/options.c b/src/options.c index d8b460d0..79e3560e 100644 --- a/src/options.c +++ b/src/options.c @@ -87,8 +87,9 @@ static mi_option_desc_t options[_mi_option_last] = { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! - { 0, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (slower in v1.x due to regions) - { 500, UNINIT, MI_OPTION(arena_purge_delay) } // reset/decommit delay in milli-seconds for arena allocation + { 0, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (disable for now in v1.x due to regions) + { 500, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation + { 1, UNINIT, MI_OPTION(allow_purge) } // allow decommit/reset to free (physical) memory back to the OS }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index 1171a1ab..980bf34c 100644 --- a/src/os.c +++ b/src/os.c @@ -440,6 +440,25 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat } +// either resets or decommits memory, returns true if the memory was decommitted. +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) +{ + if (!mi_option_is_enabled(mi_option_allow_purge)) return false; + + if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? + !_mi_preloading()) // don't decommit during preloading (unsafe) + { + _mi_os_decommit(p, size, stats); + return true; // decommitted + } + else { + _mi_os_reset(p, size, stats); + return false; // not decommitted + } +} + + + // Protect a region in memory to be not accessible. static bool mi_os_protectx(void* addr, size_t size, bool protect) { // page align conservatively within the range From 5c39fe7246328cd32ca7695c99b09c83633e0aef Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 12:57:18 -0700 Subject: [PATCH 013/102] decommit returns whether re-commit is needed --- include/mimalloc/prim.h | 11 ++++- src/os.c | 89 ++++++++++++++++++----------------------- src/prim/unix/prim.c | 61 ++++++++++++---------------- src/prim/wasi/prim.c | 10 ++++- src/prim/windows/prim.c | 18 ++++----- src/region.c | 3 +- 6 files changed, 94 insertions(+), 98 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 10378c92..c845f437 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -37,12 +37,21 @@ int _mi_prim_free(void* addr, size_t size ); // Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. +// If `commit` is false, the virtual memory range only needs to be reserved (with no access) +// which will later be committed explicitly using `_mi_prim_commit`. // pre: !commit => !allow_large // try_alignment >= _mi_os_page_size() and a power of 2 int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr); // Commit memory. Returns error code or 0 on success. -int _mi_prim_commit(void* addr, size_t size, bool commit); +// For example, on Linux this would make the memory PROT_READ|PROT_WRITE. +int _mi_prim_commit(void* addr, size_t size); + +// Decommit memory. Returns error code or 0 on success. The `decommitted` result is true +// if the memory would need to be re-committed. For example, on Windows this is always true, +// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. +// pre: decommitted != NULL +int _mi_prim_decommit(void* addr, size_t size, bool* decommitted); // Reset memory. The range keeps being accessible but the content might be reset. // Returns error code or 0 on success. diff --git a/src/os.c b/src/os.c index 980bf34c..88cdd837 100644 --- a/src/os.c +++ b/src/os.c @@ -356,60 +356,64 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -// Commit/Decommit memory. -// Usually commit is aligned liberal, while decommit is aligned conservative. -// (but not for the reset version where we want commit to be conservative as well) -static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { - // page align in the range, commit liberally, decommit conservative +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; if (is_zero != NULL) { *is_zero = false; } - size_t csize; - void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) - if (commit) { - _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit - _mi_stat_counter_increase(&stats->commit_calls, 1); - } - else { - _mi_stat_decrease(&stats->committed, size); - } + _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit + _mi_stat_counter_increase(&stats->commit_calls, 1); - int err = _mi_prim_commit(start, csize, commit); + // page align range + size_t csize; + void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize); + if (csize == 0) return true; + + // commit + int err = _mi_prim_commit(start, csize); if (err != 0) { - _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize); + _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } mi_assert_internal(err == 0); return (err == 0); } -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { +static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; - return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); + mi_assert_internal(decommitted!=NULL); + _mi_stat_decrease(&stats->committed, size); + + // page align + size_t csize; + void* start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return true; + + // decommit + *decommitted = true; + int err = _mi_prim_decommit(start,csize,decommitted); + if (err != 0) { + _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + } + mi_assert_internal(err == 0); + return (err == 0); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); + bool decommitted = true; + return mi_os_decommit_ex(addr, size, &decommitted, tld_stats); } -bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); -} // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. -static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) { +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) - if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); - if (!reset) return true; // nothing to do on unreset! + _mi_stat_increase(&stats->reset, csize); #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN memset(start, 0, csize); // pretend it is eagerly reset @@ -422,25 +426,9 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) return (err == 0); } -// Signal to the OS that the address range is no longer in use -// but may be used later again. This will release physical memory -// pages and reduce swapping while keeping the memory committed. -// We page align to a conservative area inside the range to reset. -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - return mi_os_resetx(addr, size, true, stats); -} -bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); -} - - -// either resets or decommits memory, returns true if the memory was decommitted. +// either resets or decommits memory, returns true if the memory was decommitted +// (in the sense that it needs to be re-committed if the memory is re-used later on). bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) { if (!mi_option_is_enabled(mi_option_allow_purge)) return false; @@ -448,8 +436,9 @@ bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) { - _mi_os_decommit(p, size, stats); - return true; // decommitted + bool decommitted; + mi_os_decommit_ex(p, size, &decommitted, stats); + return decommitted; } else { _mi_os_reset(p, size, stats); diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 011ffa7c..d29dcc12 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -341,46 +341,37 @@ static void unix_mprotect_hint(int err) { } -int _mi_prim_commit(void* start, size_t size, bool commit) { - /* - #if 0 && defined(MAP_FIXED) && !defined(__APPLE__) - // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) - if (commit) { - // commit: just change the protection - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) - const int fd = mi_unix_mmap_fd(); - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } - } - #else - */ - int err = 0; - if (commit) { - // commit: ensure we can access the area - err = mprotect(start, size, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) - err = unix_madvise(start, size, MADV_DONTNEED); - #else - // decommit: just disable access (also used in debug and secure mode to trap on illegal access) - err = mprotect(start, size, PROT_NONE); - if (err != 0) { err = errno; } - #endif - } + + + +int _mi_prim_commit(void* start, size_t size) { + // commit: ensure we can access the area + int err = mprotect(start, size, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } unix_mprotect_hint(err); return err; } +int _mi_prim_decommit(void* start, size_t size, bool* decommitted) { + int err = 0; + #if defined(MADV_DONTNEED) && !MI_DEBUG && !MI_SECURE + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) + *decommitted = false; + err = unix_madvise(start, size, MADV_DONTNEED); + #else + // decommit: just disable access (also used in debug and secure mode to trap on illegal access) + *decommitted = true; // needs recommit to reuse the memory + err = mprotect(start, size, PROT_NONE); + if (err != 0) { err = errno; } + #endif + return err; +} + int _mi_prim_reset(void* start, size_t size) { - #if defined(MADV_FREE) + // We always use MADV_DONTNEED even if it may be a bit more expensive as this + // guarantees that we see the actual rss reflected in tools like `top`. + #if 0 && defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); int err; diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index cb3ce1a7..e843d99d 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -126,8 +126,14 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la // Commit/Reset/Protect //--------------------------------------------- -int _mi_prim_commit(void* addr, size_t size, bool commit) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(commit); +int _mi_prim_commit(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + +int _mi_prim_decommit(void* addr, size_t size, bool* decommitted) { + MI_UNUSED(addr); MI_UNUSED(size); + *decommitted = false; return 0; } diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index e3dc33e3..72f086e3 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -257,15 +257,15 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la #pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit) #endif -int _mi_prim_commit(void* addr, size_t size, bool commit) { - if (commit) { - void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); - return (p == addr ? 0 : (int)GetLastError()); - } - else { - BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); - return (ok ? 0 : (int)GetLastError()); - } +int _mi_prim_commit(void* addr, size_t size) { + void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); + return (p == addr ? 0 : (int)GetLastError()); +} + +int _mi_prim_decommit(void* addr, size_t size, bool* decommitted) { + BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); + *decommitted = true; // for safetly, assume always decommitted even in the case of an error. + return (ok ? 0 : (int)GetLastError()); } int _mi_prim_reset(void* addr, size_t size) { diff --git a/src/region.c b/src/region.c index 809b9ec8..36226eff 100644 --- a/src/region.c +++ b/src/region.c @@ -480,7 +480,8 @@ bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { return _mi_os_commit(p, size, is_zero, tld->stats); } else { - return _mi_os_unreset(p, size, is_zero, tld->stats); + // return _mi_os_unreset(p, size, is_zero, tld->stats); + return true; } } From 1d231be75829e46ab70a79825e19ab102b510523 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 13:05:48 -0700 Subject: [PATCH 014/102] clarify needs_recommit --- include/mimalloc/prim.h | 6 +++--- src/os.c | 24 ++++++++++++------------ src/prim/unix/prim.c | 6 +++--- src/prim/wasi/prim.c | 4 ++-- src/prim/windows/prim.c | 4 ++-- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index c845f437..b900cc95 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -47,11 +47,11 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la // For example, on Linux this would make the memory PROT_READ|PROT_WRITE. int _mi_prim_commit(void* addr, size_t size); -// Decommit memory. Returns error code or 0 on success. The `decommitted` result is true +// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true // if the memory would need to be re-committed. For example, on Windows this is always true, // but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. -// pre: decommitted != NULL -int _mi_prim_decommit(void* addr, size_t size, bool* decommitted); +// pre: needs_recommit != NULL +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); // Reset memory. The range keeps being accessible but the content might be reset. // Returns error code or 0 on success. diff --git a/src/os.c b/src/os.c index 88cdd837..78a7b5f7 100644 --- a/src/os.c +++ b/src/os.c @@ -377,10 +377,10 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats return (err == 0); } -static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_stats_t* tld_stats) { +static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; - mi_assert_internal(decommitted!=NULL); + mi_assert_internal(needs_recommit!=NULL); _mi_stat_decrease(&stats->committed, size); // page align @@ -389,8 +389,8 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_sta if (csize == 0) return true; // decommit - *decommitted = true; - int err = _mi_prim_decommit(start,csize,decommitted); + *needs_recommit = true; + int err = _mi_prim_decommit(start,csize,needs_recommit); if (err != 0) { _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } @@ -399,8 +399,8 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_sta } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { - bool decommitted = true; - return mi_os_decommit_ex(addr, size, &decommitted, tld_stats); + bool needs_recommit; + return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats); } @@ -427,18 +427,18 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { } -// either resets or decommits memory, returns true if the memory was decommitted -// (in the sense that it needs to be re-committed if the memory is re-used later on). +// either resets or decommits memory, returns true if the memory needs +// to be recommitted if it is to be re-used later on. bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) { if (!mi_option_is_enabled(mi_option_allow_purge)) return false; if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? - !_mi_preloading()) // don't decommit during preloading (unsafe) + !_mi_preloading()) // don't decommit during preloading (unsafe) { - bool decommitted; - mi_os_decommit_ex(p, size, &decommitted, stats); - return decommitted; + bool needs_recommit; + mi_os_decommit_ex(p, size, &needs_recommit, stats); + return needs_recommit; } else { _mi_os_reset(p, size, stats); diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index d29dcc12..f9aa3b7c 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -352,16 +352,16 @@ int _mi_prim_commit(void* start, size_t size) { return err; } -int _mi_prim_decommit(void* start, size_t size, bool* decommitted) { +int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; #if defined(MADV_DONTNEED) && !MI_DEBUG && !MI_SECURE // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) - *decommitted = false; + *needs_recommit = false; err = unix_madvise(start, size, MADV_DONTNEED); #else // decommit: just disable access (also used in debug and secure mode to trap on illegal access) - *decommitted = true; // needs recommit to reuse the memory + *needs_recommit = true; // needs recommit to reuse the memory err = mprotect(start, size, PROT_NONE); if (err != 0) { err = errno; } #endif diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index e843d99d..57d1c690 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -131,9 +131,9 @@ int _mi_prim_commit(void* addr, size_t size) { return 0; } -int _mi_prim_decommit(void* addr, size_t size, bool* decommitted) { +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { MI_UNUSED(addr); MI_UNUSED(size); - *decommitted = false; + *needs_recommit = false; return 0; } diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 72f086e3..467a42e9 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -262,9 +262,9 @@ int _mi_prim_commit(void* addr, size_t size) { return (p == addr ? 0 : (int)GetLastError()); } -int _mi_prim_decommit(void* addr, size_t size, bool* decommitted) { +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); - *decommitted = true; // for safetly, assume always decommitted even in the case of an error. + *needs_recommit = true; // for safetly, assume always decommitted even in the case of an error. return (ok ? 0 : (int)GetLastError()); } From 77766e20a637117fb19f726be6f040492af75e97 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 14:58:06 -0700 Subject: [PATCH 015/102] use only arena's instead of regions --- CMakeLists.txt | 1 - ide/vs2022/mimalloc-override.vcxproj | 1 - ide/vs2022/mimalloc.vcxproj | 1 - include/mimalloc/internal.h | 3 + src/arena.c | 36 +- src/heap.c | 1 - src/init.c | 1 - src/options.c | 46 ++- src/region.c | 502 --------------------------- src/segment.c | 39 ++- src/static.c | 1 - 11 files changed, 81 insertions(+), 551 deletions(-) delete mode 100644 src/region.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 9550f77f..a9e098c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,6 @@ set(mi_sources src/os.c src/page.c src/random.c - src/region.c src/segment.c src/stats.c src/prim/prim.c) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 50a3d6b9..81a3fc70 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -247,7 +247,6 @@ true true - diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 9a7bf18c..d7e147b8 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -226,7 +226,6 @@ true true - true diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 5904198f..b9fe5453 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -119,7 +119,9 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_o bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id); bool _mi_arena_is_os_allocated(size_t arena_memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); +bool _mi_arena_contains(const void* p); +/* // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld); void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); @@ -132,6 +134,7 @@ bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); void _mi_mem_collect(mi_os_tld_t* tld); +*/ // "segment.c" mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); diff --git a/src/arena.c b/src/arena.c index 64cb1624..724fbaf4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -128,6 +128,10 @@ static size_t mi_block_count_of_size(size_t size) { return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } +static size_t mi_arena_block_size(size_t bcount) { + return (bcount * MI_ARENA_BLOCK_SIZE); +} + /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ @@ -158,7 +162,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! - void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); + void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); *memid = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index); *large = arena->is_large; *is_pinned = (arena->is_large || !arena->allow_decommit); @@ -183,7 +187,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { bool commit_zero; - _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); + _mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats); if (commit_zero) { *is_zero = true; } } } @@ -192,7 +196,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } - // mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE); + // mi_track_mem_undefined(p,mi_arena_block_size(needed_bcount)); return p; } @@ -207,7 +211,7 @@ static void* mi_arena_alloc_in(mi_arena_id_t arena_id, int numa_node, size_t siz const size_t bcount = mi_block_count_of_size(size); const size_t arena_index = mi_arena_id_index(arena_id); mi_assert_internal(arena_index < max_arena); - mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE); + mi_assert_internal(size <= mi_arena_block_size(bcount)); if (arena_index >= max_arena) return NULL; mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); @@ -228,7 +232,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); const size_t bcount = mi_block_count_of_size(size); if mi_likely(max_arena == 0) return NULL; - mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE); + mi_assert_internal(size <= mi_arena_block_size(bcount)); size_t arena_index = mi_arena_id_index(req_arena_id); if (arena_index < MI_MAX_ARENAS) { @@ -335,7 +339,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (arena_index >= MI_MAX_ARENAS) return NULL; mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; - if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE; + if (size != NULL) { *size = mi_arena_block_size(arena->block_count); } return arena->start; } @@ -348,8 +352,8 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(arena->allow_decommit); - const size_t size = blocks * MI_ARENA_BLOCK_SIZE; - void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE); + const size_t size = mi_arena_block_size(blocks); + void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx)); const bool decommitted = _mi_os_purge(p, size, stats); // clear the purged blocks _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); @@ -557,6 +561,22 @@ void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats) mi_arenas_try_purge(force_decommit, true, stats); } + +bool _mi_arena_contains(const void* p) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { + return true; + } + } + return false; +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return _mi_arena_contains(p); // todo: extend to track os allocated memory as well +} + /* ----------------------------------------------------------- Add an arena. ----------------------------------------------------------- */ diff --git a/src/heap.c b/src/heap.c index 31a8b660..99316bb8 100644 --- a/src/heap.c +++ b/src/heap.c @@ -158,7 +158,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions on program-exit (or shared library unload) if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - _mi_mem_collect(&heap->tld->os); _mi_arena_collect(false,true,&heap->tld->stats); } } diff --git a/src/init.c b/src/init.c index 61245cd1..8c79561f 100644 --- a/src/init.c +++ b/src/init.c @@ -590,7 +590,6 @@ static void mi_cdecl mi_process_done(void) { // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_mem_collect(&_mi_heap_main_get()->tld->os); // release all regions _mi_arena_collect(true,true,&_mi_heap_main_get()->tld->stats); } diff --git a/src/options.c b/src/options.c index 79e3560e..5d4af7ba 100644 --- a/src/options.c +++ b/src/options.c @@ -41,10 +41,11 @@ typedef struct mi_option_desc_s { mi_init_t init; // is it initialized yet? (from the environment) mi_option_t option; // for debugging: the option index should match the option const char* name; // option name without `mimalloc_` prefix + const char* legacy_name; // potential legacy option name } mi_option_desc_t; -#define MI_OPTION(opt) mi_option_##opt, #opt -#define MI_OPTION_DESC(opt) {0, UNINIT, MI_OPTION(opt) } +#define MI_OPTION(opt) mi_option_##opt, #opt, NULL +#define MI_OPTION_LEGACY(opt,legacy) mi_option_##opt, #opt, #legacy static mi_option_desc_t options[_mi_option_last] = { @@ -58,14 +59,9 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) - #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? - { 0, UNINIT, MI_OPTION(eager_region_commit) }, - { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory - #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED - #endif + { 1, UNINIT, MI_OPTION_LEGACY(segment_eager_commit,eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) + { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, + { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N @@ -87,7 +83,12 @@ static mi_option_desc_t options[_mi_option_last] = { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! - { 0, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (disable for now in v1.x due to regions) + #if (MI_INTPTR_SIZE>4) + { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time + #else + { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, + #endif + { 500, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation { 1, UNINIT, MI_OPTION(allow_purge) } // allow decommit/reset to free (physical) memory back to the OS }; @@ -504,18 +505,27 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { static void mi_option_init(mi_option_desc_t* desc) { // Read option value from the environment + char s[64 + 1]; char buf[64+1]; _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); _mi_strlcat(buf, desc->name, sizeof(buf)); - char s[64+1]; - if (mi_getenv(buf, s, sizeof(s))) { - size_t len = _mi_strnlen(s,64); - if (len >= sizeof(buf)) len = sizeof(buf) - 1; + bool found = mi_getenv(buf, s, sizeof(s)); + if (!found && desc->legacy_name != NULL) { + _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + _mi_strlcat(buf, desc->legacy_name, sizeof(buf)); + found = mi_getenv(buf, s, sizeof(s)); + if (found) { + _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name); + } + } + + if (found) { + size_t len = _mi_strnlen(s, sizeof(buf) - 1); for (size_t i = 0; i < len; i++) { buf[i] = _mi_toupper(s[i]); } buf[len] = 0; - if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { + if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) { desc->value = 1; desc->init = INITIALIZED; } @@ -546,11 +556,11 @@ static void mi_option_init(mi_option_desc_t* desc) { // if the 'mimalloc_verbose' env var has a bogus value we'd never know // (since the value defaults to 'off') so in that case briefly enable verbose desc->value = 1; - _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name ); + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); desc->value = 0; } else { - _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name ); + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); } } } diff --git a/src/region.c b/src/region.c deleted file mode 100644 index 36226eff..00000000 --- a/src/region.c +++ /dev/null @@ -1,502 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. There may be multiple -implementations of this (one could be the identity going directly to the OS, -another could be a simple cache etc), but the current one uses large "regions". -In contrast to the rest of mimalloc, the "regions" are shared between threads and -need to be accessed using atomic operations. -We need this memory layer between the raw OS calls because of: -1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory effectively. -2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses - in that object :-( (`malloc-large` tests this). This means we need a cheaper - way to reuse memory. -3. This layer allows for NUMA aware allocation. - -Possible issues: -- (2) can potentially be addressed too with a small cache per thread which is much - simpler. Generally though that requires shrinking of huge pages, and may overuse - memory per thread. (and is not compatible with `sbrk`). -- Since the current regions are per-process, we need atomic operations to - claim blocks which may be contended -- In the worst case, we need to search the whole region map (16KiB for 256GiB) - linearly. At what point will direct OS calls be faster? Is there a way to - do this better without adding too much complexity? ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc/internal.h" -#include "mimalloc/atomic.h" - -#include // memset - -#include "bitmap.h" - -// os.c -bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); - -// Constants -#if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * MI_GiB) // 64KiB for the region map -#elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * MI_GiB) // ~ KiB for the region map -#else -#error "define the maximum heap space allowed for regions on this platform" -#endif - -#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) -#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB -#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) - -// Region info -typedef union mi_region_info_u { - size_t value; - struct { - bool valid; // initialized? - bool is_large:1; // allocated in fixed large/huge OS pages - bool is_pinned:1; // pinned memory cannot be decommitted - short numa_node; // the associated NUMA node (where -1 means no associated node) - } x; -} mi_region_info_t; - - -// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with -// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. -typedef struct mem_region_s { - _Atomic(size_t) info; // mi_region_info_t.value - _Atomic(void*) start; // start of the memory area - mi_bitmap_field_t in_use; // bit per in-use block - mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block - mi_bitmap_field_t reset; // track if reset per block - _Atomic(size_t) arena_memid; // if allocated from a (huge page) arena - _Atomic(size_t) padding; // round to 8 fields (needs to be atomic for msvc, see issue #508) -} mem_region_t; - -// The region map -static mem_region_t regions[MI_REGION_MAX]; - -// Allocated regions -static _Atomic(size_t) regions_count; // = 0; - - -/* ---------------------------------------------------------------------------- -Utility functions ------------------------------------------------------------------------------*/ - -// Blocks (of 4MiB) needed for the given size. -static size_t mi_region_block_count(size_t size) { - return _mi_divide_up(size, MI_SEGMENT_SIZE); -} - -/* -// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. -static size_t mi_good_commit_size(size_t size) { - if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; - return _mi_align_up(size, _mi_os_large_page_size()); -} -*/ - -// Return if a pointer points into a region reserved by us. -mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - if (p==NULL) return false; - size_t count = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); - if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; - } - return false; -} - - -static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); - mi_assert_internal(start != NULL); - return (start + (bit_idx * MI_SEGMENT_SIZE)); -} - -static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { - mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); - size_t idx = region - regions; - mi_assert_internal(®ions[idx] == region); - return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; -} - -static size_t mi_memid_create_from_arena(size_t arena_memid) { - return (arena_memid << 1) | 1; -} - - -static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { - if ((id&1)==1) { - if (arena_memid != NULL) *arena_memid = (id>>1); - return true; - } - else { - size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; - *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; - *region = ®ions[idx]; - return false; - } -} - - -/* ---------------------------------------------------------------------------- - Allocate a region is allocated from the OS (or an arena) ------------------------------------------------------------------------------*/ - -static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // not out of regions yet? - if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; - - // try to allocate a fresh region from the OS - bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); - bool region_large = (commit && allow_large); - bool is_zero = false; - bool is_pinned = false; - size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, ®ion_commit, ®ion_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld); - if (start == NULL) return false; - mi_assert_internal(!(region_large && !allow_large)); - mi_assert_internal(!region_large || region_commit); - - // claim a fresh slot - const size_t idx = mi_atomic_increment_acq_rel(®ions_count); - if (idx >= MI_REGION_MAX) { - mi_atomic_decrement_acq_rel(®ions_count); - _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats); - _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB)); - return false; - } - - // allocated, initialize and claim the initial blocks - mem_region_t* r = ®ions[idx]; - r->arena_memid = arena_memid; - mi_atomic_store_release(&r->in_use, (size_t)0); - mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); - mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); - mi_atomic_store_release(&r->reset, (size_t)0); - *bit_idx = 0; - _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_store_ptr_release(void,&r->start, start); - - // and share it - mi_region_info_t info; - info.value = 0; // initialize the full union to zero - info.x.valid = true; - info.x.is_large = region_large; - info.x.is_pinned = is_pinned; - info.x.numa_node = (short)_mi_os_numa_node(tld); - mi_atomic_store_release(&r->info, info.value); // now make it available to others - *region = r; - return true; -} - -/* ---------------------------------------------------------------------------- - Try to claim blocks in suitable regions ------------------------------------------------------------------------------*/ - -static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { - // initialized at all? - mi_region_info_t info; - info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); - if (info.value==0) return false; - - // numa correct - if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = info.x.numa_node; - if (rnode >= 0 && rnode != numa_node) return false; - } - - // check allow-large - if (!allow_large && info.x.is_large) return false; - - return true; -} - - -static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // try all regions for a free slot - const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - mem_region_t* r = ®ions[idx]; - // if this region suits our demand (numa node matches, large OS page matches) - if (mi_region_is_suitable(r, numa_node, allow_large)) { - // then try to atomically claim a segment(s) in this region - if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { - tld->region_idx = idx; // remember the last found position - *region = r; - return true; - } - } - } - return false; -} - - -static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); - mem_region_t* region; - mi_bitmap_index_t bit_idx; - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); - // try to claim in existing regions - if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { - // otherwise try to allocate a fresh region and claim in there - if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { - // out of regions or memory - return NULL; - } - } - - // ------------------------------------------------ - // found a region and claimed `blocks` at `bit_idx`, initialize them now - mi_assert_internal(region != NULL); - mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); - - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); - mi_assert_internal(!(info.x.is_large && !*large)); - mi_assert_internal(start != NULL); - - *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *large = info.x.is_large; - *is_pinned = info.x.is_pinned; - *memid = mi_memid_create(region, bit_idx); - void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - - // commit - if (*commit) { - // ensure commit - bool any_uncommitted; - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); - if (any_uncommitted) { - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - bool commit_zero = false; - if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { - // failed to commit! unclaim and return - _mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - return NULL; - } - if (commit_zero) *is_zero = true; - } - } - else { - // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); - } - mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); - - // unreset reset blocks - if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { - // some blocks are still reset - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); - _mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed - bool reset_zero = false; - _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); - if (reset_zero) *is_zero = true; - } - } - mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); - - #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED // && !MI_TSAN - if (*commit) { ((uint8_t*)p)[0] = 0; } - #endif - - // and return the allocation - mi_assert_internal(p != NULL); - return p; -} - - -/* ---------------------------------------------------------------------------- - Allocation ------------------------------------------------------------------------------*/ - -// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. -// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - *memid = 0; - *is_zero = false; - *is_pinned = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - if (size == 0) return NULL; - size = _mi_align_up(size, _mi_os_page_size()); - - // allocate from regions if possible - void* p = NULL; - size_t arena_memid; - const size_t blocks = mi_region_block_count(size); - if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); - if (p == NULL) { - _mi_warning_message("unable to allocate from region: size %zu\n", size); - } - } - if (p == NULL) { - // and otherwise fall back to the OS - p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld); - *memid = mi_memid_create_from_arena(arena_memid); - } - - if (p != NULL) { - mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0); - #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED // && !MI_TSAN - if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed - #endif - } - return p; -} - - - -/* ---------------------------------------------------------------------------- -Free ------------------------------------------------------------------------------*/ - -// Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { - mi_assert_internal(size > 0 && tld != NULL); - if (p==NULL) return; - if (size==0) return; - size = _mi_align_up(size, _mi_os_page_size()); - - size_t arena_memid = 0; - mi_bitmap_index_t bit_idx; - mem_region_t* region; - if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { - // was a direct arena allocation, pass through - _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats); - } - else { - // allocated in a region - mi_assert_internal(align_offset == 0); - mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; - const size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - mi_assert_internal(info.value != 0); - void* blocks_start = mi_region_blocks_start(region, bit_idx); - mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); - if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? - - // committed? - if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); - } - - if (any_reset) { - // set the is_reset bits if any pages were reset - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); - } - - // reset the blocks to reduce the working set. - if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) - && (mi_option_is_enabled(mi_option_eager_commit) || - mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead - { - bool any_unreset; - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); - if (any_unreset) { - _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) - _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); - } - } - - // and unclaim - bool all_unclaimed = _mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed); - } -} - - -/* ---------------------------------------------------------------------------- - collection ------------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_os_tld_t* tld) { - // free every region that has no segments in use. - size_t rcount = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < rcount; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_load_relaxed(®ion->info) != 0) { - // if no segments used, try to claim the whole region - size_t m = mi_atomic_load_relaxed(®ion->in_use); - while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; - if (m == 0) { - // on success, free the whole region - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); - size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); - size_t commit = mi_atomic_load_relaxed(®ions[i].commit); - memset((void*)®ions[i], 0, sizeof(mem_region_t)); // cast to void* to avoid atomic warning - // and release the whole region - mi_atomic_store_release(®ion->info, (size_t)0); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_abandoned_await_readers(); // ensure no pending reads - _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats); - } - } - } - } -} - - -/* ---------------------------------------------------------------------------- - Other ------------------------------------------------------------------------------*/ - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(p, size, tld->stats); - } - else { - return _mi_os_reset(p, size, tld->stats); - } -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_commit(p, size, is_zero, tld->stats); - } - else { - // return _mi_os_unreset(p, size, is_zero, tld->stats); - return true; - } -} - -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - return _mi_os_commit(p, size, is_zero, tld->stats); -} - -bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - return _mi_os_decommit(p, size, tld->stats); -} - -bool _mi_mem_protect(void* p, size_t size) { - return _mi_os_protect(p, size); -} - -bool _mi_mem_unprotect(void* p, size_t size) { - return _mi_os_unprotect(p, size); -} diff --git a/src/segment.c b/src/segment.c index 56b7a06c..af325fe7 100644 --- a/src/segment.c +++ b/src/segment.c @@ -177,10 +177,10 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) static void mi_segment_protect_range(void* p, size_t size, bool protect) { if (protect) { - _mi_mem_protect(p, size); + _mi_os_protect(p, size); } else { - _mi_mem_unprotect(p, size); + _mi_os_unprotect(p, size); } } @@ -202,7 +202,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* if (protect && !segment->mem_is_committed) { if (protect) { // ensure secure page is committed - if (_mi_mem_commit(start, os_psize, NULL, tld)) { // if this fails that is ok (as it is an unaccessible page) + if (_mi_os_commit(start, os_psize, NULL, tld->stats)) { // if this fails that is ok (as it is an unaccessible page) mi_segment_protect_range(start, os_psize, protect); } } @@ -238,26 +238,29 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m page->is_reset = true; mi_assert_internal(size <= psize); size_t reset_size = ((size == 0 || size > psize) ? psize : size); - if (reset_size > 0) _mi_mem_reset(start, reset_size, tld->os); + if (reset_size > 0) { _mi_os_reset(start, reset_size, tld->stats); } } static bool mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + MI_UNUSED(size); MI_UNUSED(tld); mi_assert_internal(page->is_reset); mi_assert_internal(page->is_committed); mi_assert_internal(!segment->mem_is_pinned); if (segment->mem_is_pinned || !page->is_committed || !page->is_reset) return true; page->is_reset = false; + /* size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); size_t unreset_size = (size == 0 || size > psize ? psize : size); - bool is_zero = false; - bool ok = true; - if (unreset_size > 0) { - ok = _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); - } - if (is_zero) page->is_zero_init = true; - return ok; + */ + // bool is_zero = false; + // bool ok = true; + // if (unreset_size > 0) { + // ok = _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); + // } + // if (is_zero) page->is_zero_init = true; + return true; } @@ -477,7 +480,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false; } - _mi_mem_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, any_reset, tld->os); + + _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, tld->stats); } // called by threads that are terminating to free cached segments @@ -510,17 +514,18 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme *segment_size = *segment_size + (align_offset - pre_size); } - mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os); + // mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os); + mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, _mi_arena_id_none(), &memid, tld_os); if (segment == NULL) return NULL; // failed to allocate if (!(*commit)) { // ensure the initial info is committed mi_assert_internal(!mem_large && !is_pinned); bool commit_zero = false; - bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld_os); - if (commit_zero) *is_zero = true; + bool ok = _mi_os_commit(segment, pre_size, &commit_zero, tld_os->stats); + if (commit_zero) { *is_zero = true; } if (!ok) { // commit failed; we cannot touch the memory: free the segment directly and return `NULL` - _mi_mem_free(segment, *segment_size, alignment, align_offset, memid, false, false, tld_os); + _mi_arena_free(segment, *segment_size, alignment, align_offset, memid, false, tld_os->stats); return NULL; } } @@ -651,7 +656,7 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); bool is_zero = false; const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); - bool ok = _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); + bool ok = _mi_os_commit(start, psize + gsize, &is_zero, tld->stats); if (!ok) return false; // failed to commit! if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } if (is_zero) { page->is_zero_init = true; } diff --git a/src/static.c b/src/static.c index 090f0c25..483b1b41 100644 --- a/src/static.c +++ b/src/static.c @@ -31,7 +31,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "os.c" #include "page.c" // includes page-queue.c #include "random.c" -#include "region.c" #include "segment.c" #include "stats.c" #include "prim/prim.c" From 4fc597d4f4680be8e12dc8900400ec5c07983781 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 15:05:29 -0700 Subject: [PATCH 016/102] more tight purge delay --- src/arena.c | 7 ++++--- src/options.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index 724fbaf4..e711c949 100644 --- a/src/arena.c +++ b/src/arena.c @@ -305,9 +305,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset { mi_arena_id_t arena_id = 0; - bool arena_commit = _mi_os_has_overcommit(); - if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } - else if (mi_option_get(mi_option_arena_eager_commit) == 0) { arena_commit = false; } + // commit eagerly? + bool arena_commit = false; + if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } + else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); diff --git a/src/options.c b/src/options.c index 5d4af7ba..580e1887 100644 --- a/src/options.c +++ b/src/options.c @@ -75,7 +75,7 @@ static mi_option_desc_t options[_mi_option_last] = #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif - { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 10, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose @@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] = { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif - { 500, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation + { 100, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation { 1, UNINIT, MI_OPTION(allow_purge) } // allow decommit/reset to free (physical) memory back to the OS }; From 461df1e8788a26c76f03307941dce87c9b143ccf Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 15:40:06 -0700 Subject: [PATCH 017/102] implement arena exclusive heap allocation for dev --- include/mimalloc.h | 2 +- include/mimalloc/internal.h | 1 + include/mimalloc/types.h | 1 + src/heap.c | 15 ++++++++-- src/init.c | 2 ++ src/segment.c | 55 +++++++++++++++++++++++++------------ 6 files changed, 54 insertions(+), 22 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 032faa4b..f229270c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -281,7 +281,7 @@ mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; -#if MI_MALLOC_VERSION >= 200 +#if MI_MALLOC_VERSION >= 182 // Create a heap that only allocates in the specified arena mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); #endif diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b9fe5453..6e98be41 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -179,6 +179,7 @@ void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); void _mi_heap_destroy_all(void); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid); // "stats.c" void _mi_stats_done(mi_stats_t* stats); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 28343d21..434f9f67 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -410,6 +410,7 @@ struct mi_heap_s { mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") _Atomic(mi_block_t*) thread_delayed_free; mi_threadid_t thread_id; // thread this heap belongs too + mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation diff --git a/src/heap.c b/src/heap.c index 99316bb8..08b27f3d 100644 --- a/src/heap.c +++ b/src/heap.c @@ -198,15 +198,16 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { +mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? - if (heap==NULL) return NULL; + if (heap == NULL) return NULL; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); + heap->arena_id = arena_id; _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; + heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe @@ -216,6 +217,14 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { return heap; } +mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { + return mi_heap_new_in_arena(_mi_arena_id_none()); +} + +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) { + return _mi_arena_memid_is_suitable(memid, heap->arena_id); +} + uintptr_t _mi_heap_random_next(mi_heap_t* heap) { return _mi_random_next(&heap->random); } diff --git a/src/init.c b/src/init.c index 8c79561f..b105548e 100644 --- a/src/init.c +++ b/src/init.c @@ -96,6 +96,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { MI_ATOMIC_VAR_INIT(NULL), 0, // tid 0, // cookie + 0, // arena id { 0, 0 }, // keys { {0}, {0}, 0, true }, // random 0, // page count @@ -132,6 +133,7 @@ mi_heap_t _mi_heap_main = { MI_ATOMIC_VAR_INIT(NULL), 0, // thread id 0, // initial cookie + 0, // arena id { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { {0x846ca68b}, {0}, 0, true }, // random 0, // page count diff --git a/src/segment.c b/src/segment.c index af325fe7..458980cd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -54,9 +54,11 @@ static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_ } #endif +/* static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) { return (queue->first == NULL); } +*/ static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) { mi_assert_expensive(mi_segment_queue_contains(queue, segment)); @@ -500,7 +502,8 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { Segment allocation ----------------------------------------------------------- */ -static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, size_t pre_size, size_t info_size, +static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, mi_arena_id_t req_arena_id, + size_t pre_size, size_t info_size, size_t* segment_size, bool* is_zero, bool* commit, mi_segments_tld_t* tld, mi_os_tld_t* tld_os) { size_t memid; @@ -515,7 +518,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme } // mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os); - mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, _mi_arena_id_none(), &memid, tld_os); + mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, tld_os); if (segment == NULL) return NULL; // failed to allocate if (!(*commit)) { // ensure the initial info is committed @@ -541,7 +544,8 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme } // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, + mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { // required is only > 0 for huge page allocations mi_assert_internal((required > 0 && page_kind > MI_PAGE_LARGE)|| (required==0 && page_kind <= MI_PAGE_LARGE)); @@ -574,7 +578,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, bool is_zero = false; // Allocate the segment from the OS (segment_size can change due to alignment) - mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, pre_size, info_size, &segment_size, &is_zero, &commit, tld, os_tld); + mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, &segment_size, &is_zero, &commit, tld, os_tld); if (segment == NULL) return NULL; mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); mi_assert_internal(segment->mem_is_pinned ? segment->mem_is_committed : true); @@ -1094,6 +1098,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { segment->abandoned_visits++; + // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments + // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way? + bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid); bool all_pages_free; bool has_page = mi_segment_check_free(segment,block_size,&all_pages_free); // try to free up pages (due to concurrent frees) if (all_pages_free) { @@ -1104,18 +1111,19 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, // freeing but that would violate some invariants temporarily) mi_segment_reclaim(segment, heap, 0, NULL, tld); } - else if (has_page && segment->page_kind == page_kind) { + else if (has_page && segment->page_kind == page_kind && is_suitable) { // found a free page of the right kind, or page of the right block_size with free space // we return the result of reclaim (which is usually `segment`) as it might free // the segment due to concurrent frees (in which case `NULL` is returned). return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } - else if (segment->abandoned_visits >= 3) { + else if (segment->abandoned_visits >= 3 && is_suitable) { // always reclaim on 3rd visit to limit the list length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { // otherwise, push on the visited list so it gets not looked at too quickly again + // todo: reset delayed pages in the segment? mi_abandoned_visited_push(segment); } } @@ -1135,6 +1143,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s // 1. try to reclaim an abandoned segment bool reclaimed; mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); + mi_assert_internal(segment == NULL || _mi_arena_memid_is_suitable(segment->memid, heap->arena_id)); if (reclaimed) { // reclaimed the right page right into the heap mi_assert_internal(segment != NULL && segment->page_kind == page_kind && page_kind <= MI_PAGE_LARGE); @@ -1145,7 +1154,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s return segment; } // 2. otherwise allocate a fresh segment - return mi_segment_alloc(0, page_kind, page_shift, 0, tld, os_tld); + return mi_segment_alloc(0, page_kind, page_shift, 0, heap->arena_id, tld, os_tld); } @@ -1155,7 +1164,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_assert_expensive(mi_segment_is_valid(segment, tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); for (size_t i = 0; i < segment->capacity; i++) { // TODO: use a bitmap instead of search? mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { @@ -1173,24 +1182,34 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl return mi_segment_find_free(segment, tld); } -static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +static mi_page_t* mi_segment_page_try_alloc_in_queue(mi_heap_t* heap, mi_page_kind_t kind, mi_segments_tld_t* tld) { // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); - if (mi_segment_queue_is_empty(free_queue)) { + for (mi_segment_t* segment = free_queue->first; segment != NULL; segment = segment->next) { + if (_mi_arena_memid_is_suitable(segment->memid, heap->arena_id) && mi_segment_has_free(segment)) { + return mi_segment_page_alloc_in(segment, tld); + } + } + return NULL; +} + +static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_page_t* page = mi_segment_page_try_alloc_in_queue(heap, kind, tld); + if (page == NULL) { // possibly allocate or reclaim a fresh segment mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) mi_assert_internal(free_queue->first == segment); mi_assert_internal(segment->page_kind==kind); mi_assert_internal(segment->used < segment->capacity); + mi_assert_internal(_mi_arena_memid_is_suitable(segment->memid, heap->arena_id)); + page = mi_segment_page_try_alloc_in_queue(heap, kind, tld); // this should now succeed } - mi_assert_internal(free_queue->first != NULL); - mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld); mi_assert_internal(page != NULL); -#if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN + #if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN // verify it is committed _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; -#endif + #endif return page; } @@ -1217,9 +1236,9 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size return page; } -static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, tld, os_tld); + mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, req_arena_id, tld, os_tld); if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); #if MI_HUGE_PAGE_ABANDON @@ -1303,7 +1322,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0); if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; } - page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld); + page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld, os_tld); } else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld); @@ -1315,7 +1334,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld); } else { - page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld); + page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld, os_tld); } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); From a2e1d2b89321f16adc0cdb3222d6ff88ef4d7eb4 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 16:00:17 -0700 Subject: [PATCH 018/102] add segment map to track valid pointers --- CMakeLists.txt | 1 + include/mimalloc/internal.h | 21 ++--- src/arena.c | 3 - src/segment-map.c | 153 ++++++++++++++++++++++++++++++++++++ src/segment.c | 7 +- src/static.c | 1 + 6 files changed, 166 insertions(+), 20 deletions(-) create mode 100644 src/segment-map.c diff --git a/CMakeLists.txt b/CMakeLists.txt index a9e098c7..2bcd1ef7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ set(mi_sources src/page.c src/random.c src/segment.c + src/segment-map.c src/stats.c src/prim/prim.c) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 6e98be41..c776d985 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -121,20 +121,9 @@ bool _mi_arena_is_os_allocated(size_t arena_memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); bool _mi_arena_contains(const void* p); -/* -// memory.c -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); -bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld); -bool _mi_mem_protect(void* addr, size_t size); -bool _mi_mem_unprotect(void* addr, size_t size); - -void _mi_mem_collect(mi_os_tld_t* tld); -*/ +// "segment-map.c" +void _mi_segment_map_allocated_at(const mi_segment_t* segment); +void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -460,6 +449,10 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { return mi_page_block_size(page) - MI_PADDING_SIZE; } +// size of a segment +static inline size_t mi_segment_size(mi_segment_t* segment) { + return segment->segment_size; +} // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { diff --git a/src/arena.c b/src/arena.c index e711c949..14dd8b4d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -574,9 +574,6 @@ bool _mi_arena_contains(const void* p) { return false; } -mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - return _mi_arena_contains(p); // todo: extend to track os allocated memory as well -} /* ----------------------------------------------------------- Add an arena. diff --git a/src/segment-map.c b/src/segment-map.c new file mode 100644 index 00000000..56b18531 --- /dev/null +++ b/src/segment-map.c @@ -0,0 +1,153 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" + +#if (MI_INTPTR_SIZE==8) +#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) +#else +#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? + if ((uintptr_t)segment >= MI_MAX_ADDRESS) { + *bitidx = 0; + return MI_SEGMENT_MAP_WSIZE; + } + else { + const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; + *bitidx = segindex % MI_INTPTR_BITS; + const size_t mapindex = segindex / MI_INTPTR_BITS; + mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); + return mapindex; + } +} + +void _mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index==MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +void _mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index == MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + if (p == NULL) return NULL; + mi_segment_t* segment = _mi_ptr_segment(p); + mi_assert_internal(segment != NULL); + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge + const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { + return segment; // yes, allocated by us + } + if (index==MI_SEGMENT_MAP_WSIZE) return NULL; + + // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? + + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough + // valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = mi_bsr(lobits); // lobits != 0 + } + else if (index == 0) { + return NULL; + } + else { + mi_assert_internal(index > 0); + uintptr_t lomask = mask; + loindex = index; + do { + loindex--; + lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); + } while (lomask != 0 && loindex > 0); + if (lomask == 0) return NULL; + lobitidx = mi_bsr(lomask); // lomask != 0 + } + mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if mi_unlikely(!cookie_ok) return NULL; + if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return (_mi_segment_of(p) != NULL); +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + +/* +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_reset_delayed(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); + return page; +} +*/ diff --git a/src/segment.c b/src/segment.c index 458980cd..e34cb2bf 100644 --- a/src/segment.c +++ b/src/segment.c @@ -424,7 +424,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) +static size_t mi_segment_calculate_sizes(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; @@ -466,6 +466,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { segment->thread_id = 0; + _mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)segment_size),tld); if (MI_SECURE != 0) { mi_assert_internal(!segment->mem_is_pinned); @@ -540,6 +541,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme segment->mem_alignment = alignment; segment->mem_align_offset = align_offset; mi_segments_track_size((long)(*segment_size), tld); + _mi_segment_map_allocated_at(segment); return segment; } @@ -565,7 +567,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } size_t info_size; size_t pre_size; - size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); + size_t segment_size = mi_segment_calculate_sizes(capacity, required, &pre_size, &info_size); mi_assert_internal(segment_size >= required); // Initialize parameters @@ -1199,7 +1201,6 @@ static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_p // possibly allocate or reclaim a fresh segment mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) - mi_assert_internal(free_queue->first == segment); mi_assert_internal(segment->page_kind==kind); mi_assert_internal(segment->used < segment->capacity); mi_assert_internal(_mi_arena_memid_is_suitable(segment->memid, heap->arena_id)); diff --git a/src/static.c b/src/static.c index 483b1b41..bc05dd72 100644 --- a/src/static.c +++ b/src/static.c @@ -32,6 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "page.c" // includes page-queue.c #include "random.c" #include "segment.c" +#include "segment-map.c" #include "stats.c" #include "prim/prim.c" #if MI_OSX_ZONE From db74fc0c986fb3a9e88bf8d439adb49db5ea2e95 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 16:04:17 -0700 Subject: [PATCH 019/102] add abandoned reader barrier --- src/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/segment.c b/src/segment.c index e34cb2bf..0eec0727 100644 --- a/src/segment.c +++ b/src/segment.c @@ -484,6 +484,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se fully_committed = false; } + _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged) _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, tld->stats); } From d01017ffdae6e25200583eaec3d0803b499c024d Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 16:32:39 -0700 Subject: [PATCH 020/102] extend primitive api with is_zero parameters --- ide/vs2022/mimalloc-override.vcxproj | 1 + ide/vs2022/mimalloc.vcxproj | 1 + include/mimalloc/internal.h | 12 +++--- include/mimalloc/prim.h | 9 +++-- src/arena.c | 17 +++++---- src/init.c | 4 +- src/os.c | 56 +++++++++++++++------------- src/prim/unix/prim.c | 11 ++++-- src/prim/wasi/prim.c | 6 ++- src/prim/windows/prim.c | 6 ++- 10 files changed, 71 insertions(+), 52 deletions(-) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 81a3fc70..e2c7f71d 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -257,6 +257,7 @@ + diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index d7e147b8..77a1711b 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -235,6 +235,7 @@ + diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index c776d985..9b73c92c 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -86,9 +86,9 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea void _mi_thread_done(mi_heap_t* heap); // os.c -void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats); // to allocate thread local data +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); @@ -100,15 +100,15 @@ bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); -void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* stats); +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats); void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, bool* is_zero); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); // arena.c diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index b900cc95..f07bb4bd 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file // Each OS/host needs to implement these primitives, see `src/prim` // for implementations on Window, macOS, WASI, and Linux/Unix. // -// note: on all primitive functions, we always get: +// note: on all primitive functions, we always have result parameters != NUL, and: // addr != NULL and page aligned // size > 0 and page aligned // return value is an error code an int where 0 is success. @@ -39,9 +39,10 @@ int _mi_prim_free(void* addr, size_t size ); // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. // If `commit` is false, the virtual memory range only needs to be reserved (with no access) // which will later be committed explicitly using `_mi_prim_commit`. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) // pre: !commit => !allow_large // try_alignment >= _mi_os_page_size() and a power of 2 -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr); +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); // Commit memory. Returns error code or 0 on success. // For example, on Linux this would make the memory PROT_READ|PROT_WRITE. @@ -61,10 +62,10 @@ int _mi_prim_reset(void* addr, size_t size); int _mi_prim_protect(void* addr, size_t size, bool protect); // Allocate huge (1GiB) pages possibly associated with a NUMA node. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) // pre: size > 0 and a multiple of 1GiB. -// addr is either NULL or an address hint. // numa_node is either negative (don't care), or a numa node number. -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr); +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr); // Return the current NUMA node size_t _mi_prim_numa_node(void); diff --git a/src/arena.c b/src/arena.c index 14dd8b4d..5a3dfb91 100644 --- a/src/arena.c +++ b/src/arena.c @@ -322,9 +322,9 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset errno = ENOMEM; return NULL; } - *is_zero = true; + *memid = MI_MEMID_OS; - void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats); + void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, is_zero, tld->stats); if (p != NULL) { *is_pinned = *large; } return p; } @@ -612,8 +612,9 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); const size_t bitmaps = (allow_decommit ? 4 : 2); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, NULL, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; + _mi_memzero(arena, asize); // already zero'd due to os_alloc // _mi_memzero(arena, asize); @@ -654,9 +655,10 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block bool large = allow_large; - void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main); + bool is_zero; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &is_zero, &_mi_stats_main); if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) { + if (!mi_manage_os_memory_ex(start, size, (large || commit), large, is_zero, -1, exclusive, arena_id)) { _mi_os_free_ex(start, size, commit, &_mi_stats_main); _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); return ENOMEM; @@ -718,14 +720,15 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); + bool is_zero = false; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &is_zero); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); return ENOMEM; } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) { + if (!mi_manage_os_memory_ex(p, hsize, true, true, is_zero, numa_node, exclusive, arena_id)) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } diff --git a/src/init.c b/src/init.c index b105548e..177d3034 100644 --- a/src/init.c +++ b/src/init.c @@ -200,10 +200,10 @@ static mi_thread_data_t* mi_thread_data_alloc(void) { } } // if that fails, allocate directly from the OS - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), NULL, &_mi_stats_main); if (td == NULL) { // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), NULL, &_mi_stats_main); if (td == NULL) { // really out of memory _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); diff --git a/src/os.c b/src/os.c index 78a7b5f7..e639c751 100644 --- a/src/os.c +++ b/src/os.c @@ -171,27 +171,20 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { -------------------------------------------------------------- */ // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { +static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(is_zero != NULL); + mi_assert_internal(is_large != NULL); if (size == 0) return NULL; if (!commit) allow_large = false; if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning + *is_zero = false; void* p = NULL; - int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p); + int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p); if (err != 0) { _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); } - /* - if (commit && allow_large) { - p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); - if (p != NULL) { - *is_large = true; - return p; - } - } - */ - mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -203,16 +196,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { +static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_large != NULL); + mi_assert_internal(is_zero != NULL); if (!commit) allow_large = false; if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // try first with a hint (this will be aligned directly on Win 10+ or BSD) - void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); + void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; // if not aligned, free it, overallocate, and unmap around it @@ -224,7 +218,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block // over-allocate uncommitted (virtual) memory - p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); + p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); if (p == NULL) return NULL; // set p to the aligned part in the full region @@ -239,7 +233,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, } else { // mmap can free inside an allocation // overallocate... - p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats); + p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; // and selectively unmap parts around the over-allocated area. (noop on sbrk) void* aligned_p = mi_align_up_ptr(p, alignment); @@ -263,16 +257,19 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, OS API: alloc and alloc_aligned ----------------------------------------------------------- */ -void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { +void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); bool is_large = false; - return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); + bool is_zerox = false; + void* p = mi_os_mem_alloc(size, 0, true, false, &is_large, &is_zerox, stats); + if (is_zero != NULL) { *is_zero = is_zerox; } + return p; } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats) { MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); @@ -284,7 +281,12 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); + bool is_largex = false; + bool is_zerox = false; + void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &is_largex, &is_zerox, &_mi_stats_main /*tld->stats*/ ); + if (large != NULL) { *large = is_largex; } + if (is_zero != NULL) { *is_zero = is_zerox; } + return p; } /* ----------------------------------------------------------- @@ -295,20 +297,20 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar to use the actual start of the memory region. ----------------------------------------------------------- */ -void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) { +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { // regular aligned allocation - return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats); + return _mi_os_alloc_aligned(size, alignment, commit, large, is_zero, tld_stats); } else { // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; - void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats); + void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, is_zero, tld_stats); if (start == NULL) return NULL; void* p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); @@ -521,7 +523,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { #endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, bool* is_zero) { if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; @@ -533,11 +535,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // or to at least allocate as many as available on the system. mi_msecs_t start_t = _mi_clock_start(); size_t page = 0; + bool all_zero = true; while (page < pages) { // allocate a page + bool is_zerox = false; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* p = NULL; - int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p); + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zerox, &p); + if (!is_zerox) { all_zero = false; } if (err != 0) { _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); break; @@ -576,6 +581,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) { *pages_reserved = page; } if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } + if (is_zero != NULL) { *is_zero = all_zero; } return (page == 0 ? NULL : start); } diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index f9aa3b7c..eec6ca6d 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -313,12 +313,13 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + *is_zero = true; + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); return (*addr != NULL ? 0 : errno); } @@ -417,8 +418,9 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co } #endif -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { bool is_large = true; + *is_zero = true; *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes unsigned long numa_mask = (1UL << numa_node); @@ -436,8 +438,9 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, vo #else -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *is_zero = true; *addr = NULL; return ENOMEM; } diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index 57d1c690..3f2659dd 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -114,9 +114,10 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { MI_UNUSED(allow_large); MI_UNUSED(commit); *is_large = false; + *is_zero = false; *addr = mi_prim_mem_grow(size, try_alignment); return (*addr != NULL ? 0 : ENOMEM); } @@ -152,8 +153,9 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) { // Huge pages and NUMA nodes //--------------------------------------------- -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *is_zero = true; *addr = NULL; return ENOSYS; } diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 467a42e9..514fe647 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -239,10 +239,11 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW return p; } -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); + *is_zero = true; int flags = MEM_RESERVE; if (commit) { flags |= MEM_COMMIT; } *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); @@ -331,7 +332,8 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE); } -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + *is_zero = true; *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node); return (*addr != NULL ? 0 : (int)GetLastError()); } From 9f0da5c1951aec25c432dff013c16e4f09244efd Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 16:48:02 -0700 Subject: [PATCH 021/102] merge ide files --- ide/vs2022/mimalloc-override.vcxproj | 3 --- ide/vs2022/mimalloc.vcxproj | 3 --- 2 files changed, 6 deletions(-) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 5c0513c6..52ed5282 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -257,11 +257,8 @@ -<<<<<<< HEAD -======= ->>>>>>> dev-reset diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 36100a79..33a719c1 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -235,11 +235,8 @@ -<<<<<<< HEAD -======= ->>>>>>> dev-reset From 8d56c155f9f34dfc25710a9683c6a5ef8c15a9e3 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 16:58:58 -0700 Subject: [PATCH 022/102] set page_free to 0 by default --- src/options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/options.c b/src/options.c index 580e1887..b9794dcb 100644 --- a/src/options.c +++ b/src/options.c @@ -60,14 +60,14 @@ static mi_option_desc_t options[_mi_option_last] = // the following options are experimental and not all combinations make sense. { 1, UNINIT, MI_OPTION_LEGACY(segment_eager_commit,eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) - { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, - { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) + { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) + { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) #if defined(__NetBSD__) From cb0369452d93054d89008f3b3edb6254207a2d13 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 17:54:33 -0700 Subject: [PATCH 023/102] use exponetially sized arenas --- include/mimalloc/internal.h | 1 + include/mimalloc/prim.h | 11 ++++--- src/arena.c | 62 +++++++++++++++++++++++-------------- src/os.c | 8 ++++- src/prim/unix/prim.c | 1 + src/prim/wasi/prim.c | 1 + src/prim/windows/prim.c | 1 + 7 files changed, 56 insertions(+), 29 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 9b73c92c..155fd862 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -92,6 +92,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to f size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index f07bb4bd..094d7ab9 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -22,11 +22,12 @@ terms of the MIT license. A copy of the license can be found in the file // OS memory configuration typedef struct mi_os_mem_config_s { - size_t page_size; // 4KiB - size_t large_page_size; // 2MiB - size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) - bool has_overcommit; // can we reserve more memory than can be actually committed? - bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc) + size_t page_size; // 4KiB + size_t large_page_size; // 2MiB + size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + bool has_overcommit; // can we reserve more memory than can be actually committed? + bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc) + bool has_virtual_reserve; // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) } mi_os_mem_config_t; // Initialize diff --git a/src/arena.c b/src/arena.c index 5a3dfb91..6ad9a5a1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,7 +1,5 @@ - - /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2022, Microsoft Research, Daan Leijen +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -25,7 +23,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo #include "mimalloc/atomic.h" #include // memset -#include // ENOMEM +#include // ENOMEM #include "bitmap.h" // atomic bitmap @@ -38,7 +36,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo typedef uintptr_t mi_block_info_t; #define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB -#define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) +#define MI_MAX_ARENAS (112) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor typedef struct mi_arena_s { @@ -277,6 +275,35 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size return NULL; } +// try to reserve a fresh arena +static bool mi_arena_reserve(size_t size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) +{ + if (_mi_preloading()) return false; + if (req_arena_id != _mi_arena_id_none()) return false; + + const size_t arena_count = mi_atomic_load_relaxed(&mi_arena_count); + if (arena_count > (MI_MAX_ARENAS - 4)) return false; + + size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); + if (arena_reserve == 0) return false; + + if (!_mi_os_has_virtual_reserve()) { + arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example) + } + arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); + if (arena_count >= 8 && arena_count <= 128) { + arena_reserve = (1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially + } + if (arena_reserve < size) return false; + + // commit eagerly? + bool arena_commit = false; + if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } + else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + + return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0); +} + void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { @@ -296,24 +323,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena - size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); - arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); - if (arena_reserve > 0 && arena_reserve >= size && // eager reserve enabled and large enough? - req_arena_id == _mi_arena_id_none() && // not exclusive? - mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) && // not too many arenas already? - !_mi_preloading() ) // and not before main runs - { - mi_arena_id_t arena_id = 0; - - // commit eagerly? - bool arena_commit = false; - if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } - else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } - - if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { - p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - if (p != NULL) return p; - } + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(size,*large,req_arena_id,&arena_id)) { + // and try allocate in there + p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + if (p != NULL) return p; } } @@ -334,6 +348,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, b return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); } + void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; size_t arena_index = mi_arena_id_index(arena_id); @@ -344,6 +359,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { return arena->start; } + /* ----------------------------------------------------------- Arena purge ----------------------------------------------------------- */ diff --git a/src/os.c b/src/os.c index e639c751..4710b809 100644 --- a/src/os.c +++ b/src/os.c @@ -21,13 +21,19 @@ static mi_os_mem_config_t mi_os_mem_config = { 0, // large page size (usually 2MiB) 4096, // allocation granularity true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) - false // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + false, // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) }; bool _mi_os_has_overcommit(void) { return mi_os_mem_config.has_overcommit; } +bool _mi_os_has_virtual_reserve(void) { + return mi_os_mem_config.has_virtual_reserve; +} + + // OS (small) page size size_t _mi_os_page_size(void) { return mi_os_mem_config.page_size; diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index eec6ca6d..e3a6f8a9 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -134,6 +134,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? config->has_overcommit = unix_detect_overcommit(); config->must_free_whole = false; // mmap can free in parts + config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) } diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index 3f2659dd..bf78a258 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -21,6 +21,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->alloc_granularity = 16; config->has_overcommit = false; config->must_free_whole = true; + config->has_virtual_reserve = false; } //--------------------------------------------- diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 514fe647..af6af5fe 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -113,6 +113,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->has_overcommit = false; config->must_free_whole = true; + config->has_virtual_reserve = true; // get the page size SYSTEM_INFO si; GetSystemInfo(&si); From 4c4f2f4084a7c42c595e8f95fefee904a40c5ea1 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 18:13:35 -0700 Subject: [PATCH 024/102] clean up arena function names --- src/arena.c | 42 ++++++++++++++++++++++-------------------- src/segment.c | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/arena.c b/src/arena.c index 6ad9a5a1..9eedcfdb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -133,7 +133,9 @@ static size_t mi_arena_block_size(size_t bcount) { /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) + +// claim the `blocks_inuse` bits +static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { @@ -148,7 +150,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena Allocation ----------------------------------------------------------- */ -static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, +static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { @@ -157,7 +159,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; mi_bitmap_index_t bitmap_index; - if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; + if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); @@ -199,9 +201,9 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren } // allocate in a speficic arena -static void* mi_arena_alloc_in(mi_arena_id_t arena_id, int numa_node, size_t size, size_t alignment, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) +static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, int numa_node, size_t size, size_t alignment, + bool* commit, bool* large, bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); @@ -216,12 +218,12 @@ static void* mi_arena_alloc_in(mi_arena_id_t arena_id, int numa_node, size_t siz if (arena == NULL) return NULL; if (arena->numa_node >= 0 && arena->numa_node != numa_node) return NULL; if (!(*large) && arena->is_large) return NULL; - return mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + return mi_arena_alloc_at(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); } // allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, +static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) { @@ -233,14 +235,14 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size mi_assert_internal(size <= mi_arena_block_size(bcount)); size_t arena_index = mi_arena_id_index(req_arena_id); - if (arena_index < MI_MAX_ARENAS) { + if (arena_index < MI_MAX_ARENAS && arena_index < max_arena) { // try a specific arena if requested mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); if ((arena != NULL) && - (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + // (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -251,9 +253,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; // end reached if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -264,9 +266,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; // end reached if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -276,7 +278,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size } // try to reserve a fresh arena -static bool mi_arena_reserve(size_t size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) +static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) { if (_mi_preloading()) return false; if (req_arena_id != _mi_arena_id_none()) return false; @@ -294,7 +296,7 @@ static bool mi_arena_reserve(size_t size, bool allow_large, mi_arena_id_t req_ar if (arena_count >= 8 && arena_count <= 128) { arena_reserve = (1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially } - if (arena_reserve < size) return false; + if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size // commit eagerly? bool arena_commit = false; @@ -319,14 +321,14 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arenas_alloc(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena mi_arena_id_t arena_id = 0; if (mi_arena_reserve(size,*large,req_arena_id,&arena_id)) { // and try allocate in there - p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + p = mi_arena_alloc_at_id(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; } } diff --git a/src/segment.c b/src/segment.c index 0eec0727..bfd2b75b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -575,7 +575,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && // don't delay for large objects // !_mi_os_has_overcommit() && // never delay on overcommit systems _mi_current_thread_count() > 1 && // do not delay for the first N threads - tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); bool is_zero = false; From 83aa63548561d3b17f77ed7c94d6945341129597 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 18:44:08 -0700 Subject: [PATCH 025/102] implement arena destroy on program exit --- src/arena.c | 100 +++++++++++++++++++++++++++++++++++++++++----------- src/heap.c | 2 +- src/init.c | 2 +- 3 files changed, 82 insertions(+), 22 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9eedcfdb..e8c7418e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,13 +42,16 @@ typedef uintptr_t mi_block_info_t; typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific bool exclusive; // only allow allocations if specifically for this arena + bool owned; // if true, the arena will be released when the process exits if `mi_option_destroy_on_exit` is set. _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + size_t meta_size; // size of the arena structure itself including the bitmaps int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL bool is_large; // large- or huge OS pages (always committed) + bool is_huge_alloc; // huge OS pages allocated by `_mi_os_alloc_huge_pages` _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? @@ -63,6 +66,8 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept; + /* ----------------------------------------------------------- Arena id's 0 is used for non-arena's (like OS memory) @@ -130,6 +135,12 @@ static size_t mi_arena_block_size(size_t bcount) { return (bcount * MI_ARENA_BLOCK_SIZE); } +static size_t mi_arena_size(mi_arena_t* arena) { + return mi_arena_block_size(arena->block_count); +} + + + /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ @@ -251,8 +262,8 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t // try numa affine allocation for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; // end reached - if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? + if (arena != NULL && + (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); @@ -264,8 +275,8 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t // try from another numa node instead.. for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; // end reached - if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! + if (arena != NULL && + (arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); @@ -294,7 +305,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re } arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); if (arena_count >= 8 && arena_count <= 128) { - arena_reserve = (1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially + arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially } if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size @@ -499,9 +510,11 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) size_t max_purge_count = (visit_all ? max_arena : 1); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (mi_arena_try_purge(arena, now, force, stats)) { - if (max_purge_count <= 1) break; - max_purge_count--; + if (arena != NULL) { + if (mi_arena_try_purge(arena, now, force, stats)) { + if (max_purge_count <= 1) break; + max_purge_count--; + } } } } @@ -574,10 +587,41 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, mi_arenas_try_purge(false, false, stats); } +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +static void mi_arenas_destroy(void) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + size_t new_max_arena = 0; + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + if (arena->owned && arena->start != NULL) { + mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + if (arena->is_huge_alloc) { + _mi_os_free_huge_pages(arena->start, mi_arena_size(arena), &_mi_stats_main); + } + else { + _mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main); + } + _mi_os_free(arena, arena->meta_size, &_mi_stats_main); + } + else { + new_max_arena = i; + } + } + } + + // try to lower the max arena. + size_t expected = max_arena; + mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); +} + void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats) { - MI_UNUSED(free_arenas); // todo - mi_arenas_try_purge(force_decommit, true, stats); + if (free_arenas) { + mi_arenas_destroy(); + } + mi_arenas_try_purge(force_decommit, true, stats); } @@ -585,7 +629,7 @@ bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { + if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { return true; } } @@ -601,20 +645,20 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - if (arena_id != NULL) *arena_id = -1; + if (arena_id != NULL) { *arena_id = -1; } size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } - mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); arena->id = mi_arena_id_create(i); - if (arena_id != NULL) *arena_id = arena->id; + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena->id; } return true; } -bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (size < MI_ARENA_BLOCK_SIZE) return false; @@ -638,11 +682,14 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); arena->exclusive = exclusive; + arena->owned = owned; + arena->meta_size = asize; arena->block_count = bcount; arena->field_count = fields; arena->start = (uint8_t*)start; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; + arena->is_huge_alloc= is_huge_alloc; arena->is_zero_init = is_zero; arena->allow_decommit = allow_decommit; arena->purge_expire = 0; @@ -667,8 +714,13 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is } +bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + return mi_manage_os_memory_ex2(start,size,is_committed,is_large,false,is_zero,numa_node,exclusive,false /* not owned */, arena_id); +} + + // Reserve a range of regular OS memory -int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept +static int mi_reserve_os_memory_ex2(size_t size, bool commit, bool allow_large, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block @@ -676,7 +728,7 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc bool is_zero; void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &is_zero, &_mi_stats_main); if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory_ex(start, size, (large || commit), large, is_zero, -1, exclusive, arena_id)) { + if (!mi_manage_os_memory_ex2(start, size, (large || commit), large, false, is_zero, -1, exclusive, owned, arena_id)) { _mi_os_free_ex(start, size, commit, &_mi_stats_main); _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); return ENOMEM; @@ -685,10 +737,18 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc return 0; } -bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { - return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false, NULL); +// Reserve a range of regular OS memory +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + return mi_reserve_os_memory_ex2(size,commit,allow_large,exclusive,true /*owned*/, arena_id); } + +// Manage a range of regular OS memory +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { + return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); +} + +// Reserve a range of regular OS memory int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); } @@ -746,7 +806,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex(p, hsize, true, true, is_zero, numa_node, exclusive, arena_id)) { + if (!mi_manage_os_memory_ex2(p, hsize, true, true, true, is_zero, numa_node, exclusive, true /* owned */, arena_id)) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } diff --git a/src/heap.c b/src/heap.c index 08b27f3d..53923cf6 100644 --- a/src/heap.c +++ b/src/heap.c @@ -158,7 +158,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions on program-exit (or shared library unload) if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - _mi_arena_collect(false,true,&heap->tld->stats); + _mi_arena_collect(false /* destroy arenas */, true /* force purge */, &heap->tld->stats); } } diff --git a/src/init.c b/src/init.c index 177d3034..f878789c 100644 --- a/src/init.c +++ b/src/init.c @@ -592,7 +592,7 @@ static void mi_cdecl mi_process_done(void) { // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_arena_collect(true,true,&_mi_heap_main_get()->tld->stats); + _mi_arena_collect(true /* destroy (owned) arenas */, true /* purge the rest */, &_mi_heap_main_get()->tld->stats); } if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { From 8ddf6f96a8cd2cbe0e3843a85595e8b3b6bf69cf Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 19:03:18 -0700 Subject: [PATCH 026/102] fix purge bug in arenas --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index e8c7418e..304987e5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -423,7 +423,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, bool all_purged = false; while (bitidx < endidx) { size_t count = 0; - while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) == 1) { + while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) { count++; } if (count > 0) { From b6a395b5ecc60a92603b3154fd4c2cd8388cab5d Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 19:18:47 -0700 Subject: [PATCH 027/102] reset stats more early for mstress --- test/test-stress.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index f253000f..c6236b77 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -219,6 +219,10 @@ static void test_leak(void) { #endif int main(int argc, char** argv) { + #ifndef USE_STD_MALLOC + mi_stats_reset(); + #endif + // > mimalloc-test-stress [THREADS] [SCALE] [ITER] if (argc >= 2) { char* end; @@ -241,9 +245,6 @@ int main(int argc, char** argv) { //printf("(reserve huge: %i\n)", res); //bench_start_program(); -#ifndef USE_STD_MALLOC - mi_stats_reset(); -#endif // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. srand(0x7feb352d); From 55df1f3a541ae27afa7e5070544f2022ca8e79f4 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Apr 2023 09:48:22 -0700 Subject: [PATCH 028/102] more detailed stats, including purges --- include/mimalloc/types.h | 3 +++ src/init.c | 5 +++-- src/os.c | 3 +++ src/stats.c | 31 ++++++++++++++++++++++--------- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 434f9f67..298a373b 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -487,6 +487,7 @@ typedef struct mi_stats_s { mi_stat_count_t reserved; mi_stat_count_t committed; mi_stat_count_t reset; + mi_stat_count_t purged; mi_stat_count_t page_committed; mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; @@ -499,6 +500,8 @@ typedef struct mi_stats_s { mi_stat_counter_t pages_extended; mi_stat_counter_t mmap_calls; mi_stat_counter_t commit_calls; + mi_stat_counter_t reset_calls; + mi_stat_counter_t purge_calls; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t normal_count; diff --git a/src/init.c b/src/init.c index f878789c..d47ee14f 100644 --- a/src/init.c +++ b/src/init.c @@ -76,8 +76,9 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ + MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() // -------------------------------------------------------- diff --git a/src/os.c b/src/os.c index 4710b809..fed248df 100644 --- a/src/os.c +++ b/src/os.c @@ -422,6 +422,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) _mi_stat_increase(&stats->reset, csize); + _mi_stat_counter_increase(&stats->reset_calls, 1); #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN memset(start, 0, csize); // pretend it is eagerly reset @@ -440,6 +441,8 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) { if (!mi_option_is_enabled(mi_option_allow_purge)) return false; + _mi_stat_counter_increase(&stats->purge_calls, 1); + _mi_stat_increase(&stats->purged, size); if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) diff --git a/src/stats.c b/src/stats.c index 0ab2acd2..448248c7 100644 --- a/src/stats.c +++ b/src/stats.c @@ -96,6 +96,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->reserved, &src->reserved, 1); mi_stat_add(&stats->committed, &src->committed, 1); mi_stat_add(&stats->reset, &src->reset, 1); + mi_stat_add(&stats->purged, &src->purged, 1); mi_stat_add(&stats->page_committed, &src->page_committed, 1); mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); @@ -111,6 +112,8 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); + mi_stat_counter_add(&stats->reset_calls, &src->reset_calls, 1); + mi_stat_counter_add(&stats->purge_calls, &src->purge_calls, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); @@ -143,7 +146,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* const int64_t pos = (n < 0 ? -n : n); if (pos < base) { if (n!=1 || suffix[0] != 'B') { // skip printing 1 B for the unit column - snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix)); + snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix)); } } else { @@ -158,7 +161,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix); snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc); } - _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); + _mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf); } @@ -167,7 +170,7 @@ static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* a } static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { - if (unit==1) _mi_fprintf(out, arg, "%11s"," "); + if (unit==1) _mi_fprintf(out, arg, "%12s"," "); else mi_print_amount(n,0,out,arg); } @@ -182,7 +185,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64 mi_print_count(stat->allocated, unit, out, arg); if (stat->allocated > stat->freed) { _mi_fprintf(out, arg, " "); - _mi_fprintf(out, arg, (notok == NULL ? "not all freed!" : notok)); + _mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok)); _mi_fprintf(out, arg, "\n"); } else { @@ -195,7 +198,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64 mi_print_amount(stat->freed, -1, out, arg); mi_print_amount(stat->current, -1, out, arg); if (unit==-1) { - _mi_fprintf(out, arg, "%22s", ""); + _mi_fprintf(out, arg, "%24s", ""); } else { mi_print_amount(-unit, 1, out, arg); @@ -219,12 +222,19 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t mi_stat_print_ex(stat, msg, unit, out, arg, NULL); } +static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { + _mi_fprintf(out, arg, "%10s:", msg); + mi_print_amount(stat->peak, unit, out, arg); + _mi_fprintf(out, arg, "\n"); +} + static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); _mi_fprintf(out, arg, "\n"); } + static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); @@ -234,7 +244,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* static void mi_print_header(mi_output_fun* out, void* arg ) { - _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); + _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); } #if MI_STAT>1 @@ -321,7 +331,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, ""); - mi_stat_print(&stats->reset, "reset", 1, out, arg); + mi_stat_peak_print(&stats->reset, "reset", 1, out, arg ); + mi_stat_peak_print(&stats->purged, "purged", 1, out, arg ); mi_stat_print(&stats->page_committed, "touched", 1, out, arg); mi_stat_print(&stats->segments, "segments", -1, out, arg); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); @@ -332,9 +343,11 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_counter_print(&stats->reset_calls, "resets", out, arg); + mi_stat_counter_print(&stats->purge_calls, "purges", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); - _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); + _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); size_t elapsed; size_t user_time; @@ -345,7 +358,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) size_t peak_commit; size_t page_faults; mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); - _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); From 96e9e6f5702dc732053790173466cfcf178bbd24 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Apr 2023 10:11:54 -0700 Subject: [PATCH 029/102] clean up option naming --- include/mimalloc.h | 11 ++++++----- src/options.c | 43 +++++++++++++++++++++-------------------- src/os.c | 2 +- src/prim/windows/prim.c | 2 +- src/segment.c | 8 ++++++-- 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index f229270c..94df140d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -319,17 +319,17 @@ typedef enum mi_option_e { mi_option_show_stats, // print statistics on termination mi_option_verbose, // print verbose messages // the following options are experimental (see src/options.h) - mi_option_segment_eager_commit, + mi_option_eager_commit, mi_option_arena_eager_commit, mi_option_purge_decommits, - mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit + mi_option_allow_large_os_pages, // enable large (2MiB) OS pages, implies eager commit mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup mi_option_deprecated_segment_cache, mi_option_page_reset, mi_option_abandoned_page_reset, - mi_option_segment_reset, + mi_option_deprecated_segment_reset, mi_option_eager_commit_delay, mi_option_purge_delay, mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes. @@ -342,12 +342,13 @@ typedef enum mi_option_e { mi_option_arena_reserve, mi_option_arena_purge_delay, mi_option_allow_purge, + mi_option_purge_extend_delay, _mi_option_last, // legacy options - mi_option_eager_commit = mi_option_segment_eager_commit, + mi_option_large_os_pages = mi_option_allow_large_os_pages, mi_option_eager_region_commit = mi_option_arena_eager_commit, mi_option_reset_decommits = mi_option_purge_decommits, - mi_option_reset_delay = mi_option_purge_delay + mi_option_reset_delay = mi_option_purge_delay } mi_option_t; diff --git a/src/options.c b/src/options.c index b9794dcb..bb25c940 100644 --- a/src/options.c +++ b/src/options.c @@ -59,38 +59,39 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION_LEGACY(segment_eager_commit,eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) - { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's - { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages - { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N + { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, - { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates - { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(abandoned_page_reset) }, // reset free page memory when a thread terminates + { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, // reset segment memory on free (needs eager commit) #if defined(__NetBSD__) - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed #else - { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif - { 10, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. - { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) - { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output - { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output - { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. - { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! + { 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output + { 8, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. number of segment reclaims from the abandoned segments per try. + { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! #if (MI_INTPTR_SIZE>4) - { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time + { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time #else { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif - { 100, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation - { 1, UNINIT, MI_OPTION(allow_purge) } // allow decommit/reset to free (physical) memory back to the OS + { 100, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation + { 1, UNINIT, MI_OPTION(allow_purge) }, // allow decommit/reset to free (physical) memory back to the OS + { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index fed248df..d37ab9d8 100644 --- a/src/os.c +++ b/src/os.c @@ -46,7 +46,7 @@ size_t _mi_os_large_page_size(void) { bool _mi_os_use_large_page(size_t size, size_t alignment) { // if we have access, check the size and alignment requirements - if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; + if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false; return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0); } diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index af6af5fe..1544c641 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -143,7 +143,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); FreeLibrary(hDll); } - if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { win_enable_large_os_pages(&config->large_page_size); } } diff --git a/src/segment.c b/src/segment.c index bfd2b75b..f8379aa4 100644 --- a/src/segment.c +++ b/src/segment.c @@ -275,7 +275,7 @@ static bool mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, // against overflow, we use substraction to check for expiry which work // as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) static void mi_page_reset_set_expire(mi_page_t* page) { - uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_reset_delay); + uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_purge_delay); page->used = expire; } @@ -292,7 +292,7 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen if (!mi_option_is_enabled(mi_option_page_reset)) return; if (segment->mem_is_pinned || page->segment_in_use || !page->is_committed || page->is_reset) return; - if (mi_option_get(mi_option_reset_delay) == 0) { + if (mi_option_get(mi_option_purge_delay) == 0) { // reset immediately? mi_page_reset(segment, page, 0, tld); } @@ -480,9 +480,13 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se if (!page->is_committed) { fully_committed = false; } if (page->is_reset) { any_reset = true; } } + // TODO: for now, pages always reset but we can purge instead allowing for pages to be decommitted. + MI_UNUSED(any_reset); + /* if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false; } + */ _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged) _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, tld->stats); From cdefd5b69c2acae1fbdd8c55f0dde2144d959b79 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Apr 2023 11:11:03 -0700 Subject: [PATCH 030/102] fix stats for purging --- include/mimalloc/internal.h | 3 ++- src/arena.c | 37 +++++++++++++++++++++++++++++++++---- src/os.c | 11 +++++++++-- src/segment.c | 16 +++++++++++++--- 4 files changed, 57 insertions(+), 10 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 155fd862..59f81ee1 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -100,6 +100,7 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* stats); void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats); @@ -114,7 +115,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, size_t committed, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id); diff --git a/src/arena.c b/src/arena.c index 304987e5..8d52d652 100644 --- a/src/arena.c +++ b/src/arena.c @@ -384,11 +384,24 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_assert_internal(arena->allow_decommit); const size_t size = mi_arena_block_size(blocks); void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx)); - const bool decommitted = _mi_os_purge(p, size, stats); + bool needs_recommit; + if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { + // all blocks are committed, we can purge freely + needs_recommit = _mi_os_purge(p, size, stats); + } + else { + // some blocks are not committed -- this can happen when a partially committed block is freed + // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge + // we need to ensure we do not try to reset, and also not count the decommit stats (as it was already adjusted) + mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); + _mi_stat_increase(&stats->committed, size); + } + // clear the purged blocks _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); // update committed bitmap - if (decommitted) { + if (needs_recommit) { _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); } } @@ -525,13 +538,20 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, size_t committed_size, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); + mi_assert_internal(committed_size <= size); if (p==NULL) return; if (size==0) return; + const bool all_committed = (committed_size == size); + if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through + if (!all_committed && committed_size > 0) { + // if partially committed, adjust the committed stats + _mi_stat_decrease(&stats->committed, committed_size); + } _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats); } else { @@ -566,10 +586,19 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, else { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); + if (!all_committed) { - // assume the entire range as no longer committed + // mark the entire range as no longer committed (so we recommit the full range when re-using) _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); mi_track_mem_noaccess(p,size); + if (committed_size > 0) { + // if partially committed, adjust the committed stats (is it will be recommitted when re-using) + // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. + _mi_stat_decrease(&stats->committed, committed_size); + } + // note: if not all committed, it may be that the purge will reset/decommit the entire range + // that contains already decommitted parts. Since purge consistently uses reset or decommit that + // works (as we should never reset decommitted parts). } // (delay) purge the entire range mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); diff --git a/src/os.c b/src/os.c index d37ab9d8..a07a28ab 100644 --- a/src/os.c +++ b/src/os.c @@ -438,7 +438,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. -bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) { if (!mi_option_is_enabled(mi_option_allow_purge)) return false; _mi_stat_counter_increase(&stats->purge_calls, 1); @@ -452,11 +452,18 @@ bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) return needs_recommit; } else { - _mi_os_reset(p, size, stats); + if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed + _mi_os_reset(p, size, stats); + } return false; // not decommitted } } +// either resets or decommits memory, returns true if the memory needs +// to be recommitted if it is to be re-used later on. +bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) { + return _mi_os_purge_ex(p, size, true, stats); +} // Protect a region in memory to be not accessible. diff --git a/src/segment.c b/src/segment.c index f8379aa4..2beaaacb 100644 --- a/src/segment.c +++ b/src/segment.c @@ -373,10 +373,14 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) { Segment size calculations ----------------------------------------------------------- */ +static size_t mi_segment_raw_page_size(const mi_segment_t* segment) { + return (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); +} + // Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) // The raw start is not taking aligned block allocation into consideration. static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); + size_t psize = mi_segment_raw_page_size(segment); uint8_t* p = (uint8_t*)segment + page->segment_idx * psize; if (page->segment_idx == 0) { @@ -475,13 +479,19 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se bool any_reset = false; bool fully_committed = true; + size_t committed = 0; + const size_t page_size = mi_segment_raw_page_size(segment); for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; + if (page->is_committed) { committed += page_size; } if (!page->is_committed) { fully_committed = false; } if (page->is_reset) { any_reset = true; } } - // TODO: for now, pages always reset but we can purge instead allowing for pages to be decommitted. MI_UNUSED(any_reset); + MI_UNUSED(fully_committed); + mi_assert_internal((fully_committed && committed == segment_size) || (!fully_committed && committed < segment_size)); + + // TODO: for now, pages always reset but we can purge instead allowing for pages to be decommitted. /* if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false; @@ -489,7 +499,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se */ _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged) - _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, tld->stats); + _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, committed, tld->stats); } // called by threads that are terminating to free cached segments From 6bd591d675d3fe9595fcbf82f7c93c4b759cdd97 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Apr 2023 11:57:32 -0700 Subject: [PATCH 031/102] fix tsan errors --- src/alloc.c | 1 + src/arena.c | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index a0d67fa8..b17adf45 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -686,6 +686,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_assert_internal(p!=NULL); // todo: do not track as the usable size is still the same in the free; adjust potential padding? // mi_track_resize(p,size,newsize) + // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); } return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); diff --git a/src/arena.c b/src/arena.c index 8d52d652..81097235 100644 --- a/src/arena.c +++ b/src/arena.c @@ -53,10 +53,10 @@ typedef struct mi_arena_s { bool is_large; // large- or huge OS pages (always committed) bool is_huge_alloc; // huge OS pages allocated by `_mi_os_alloc_huge_pages` _Atomic(size_t) search_idx; // optimization to start the search for free blocks - _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) - mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -248,7 +248,7 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t size_t arena_index = mi_arena_id_index(req_arena_id); if (arena_index < MI_MAX_ARENAS && arena_index < max_arena) { // try a specific arena if requested - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if ((arena != NULL) && // (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages @@ -261,7 +261,7 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t else { // try numa affine allocation for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL && (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages @@ -274,7 +274,7 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t // try from another numa node instead.. for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL && (arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages @@ -294,7 +294,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re if (_mi_preloading()) return false; if (req_arena_id != _mi_arena_id_none()) return false; - const size_t arena_count = mi_atomic_load_relaxed(&mi_arena_count); + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); if (arena_count > (MI_MAX_ARENAS - 4)) return false; size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); @@ -366,7 +366,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; size_t arena_index = mi_arena_id_index(arena_id); if (arena_index >= MI_MAX_ARENAS) return NULL; - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; if (size != NULL) { *size = mi_arena_block_size(arena->block_count); } return arena->start; @@ -512,7 +512,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) const long delay = mi_option_get(mi_option_arena_purge_delay); if (_mi_preloading() || delay == 0 || !mi_option_is_enabled(mi_option_allow_purge)) return; // nothing will be scheduled - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); if (max_arena == 0) return; // allow only one thread to purge at a time @@ -522,7 +522,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) mi_msecs_t now = _mi_clock_now(); size_t max_purge_count = (visit_all ? max_arena : 1); for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { if (mi_arena_try_purge(arena, now, force, stats)) { if (max_purge_count <= 1) break; @@ -561,7 +561,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t bitmap_idx; mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); const size_t blocks = mi_block_count_of_size(size); @@ -622,7 +622,7 @@ static void mi_arenas_destroy(void) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); size_t new_max_arena = 0; for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { if (arena->owned && arena->start != NULL) { mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); @@ -657,7 +657,7 @@ void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats) bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { return true; } @@ -730,6 +730,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, if (arena->blocks_committed != NULL && is_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning } + // and claim leftover blocks if needed (so we never allocate there) ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; mi_assert_internal(post >= 0); @@ -738,7 +739,6 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } - return mi_arena_add(arena, arena_id); } From bcbcc343ec658382bc6ab59615590c1cdb75ef6e Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Fri, 7 Apr 2023 08:59:43 +0100 Subject: [PATCH 032/102] fix build on freebsd. close #722 --- src/prim/unix/prim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 011ffa7c..4efc10d3 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -169,7 +169,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { int err = errno; - _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); } if (p!=MAP_FAILED) return p; // fall back to regular mmap From eb62caeda02ad851b0344912bc98b81894b80222 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 8 Apr 2023 17:37:58 -0700 Subject: [PATCH 033/102] allow per page purging instead of reset --- include/mimalloc.h | 9 +- include/mimalloc/types.h | 7 +- src/arena.c | 6 +- src/init.c | 2 +- src/options.c | 6 +- src/page.c | 2 +- src/segment.c | 224 ++++++++++++++++----------------------- 7 files changed, 111 insertions(+), 145 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 94df140d..9cc2afba 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -327,8 +327,8 @@ typedef enum mi_option_e { mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup mi_option_deprecated_segment_cache, - mi_option_page_reset, - mi_option_abandoned_page_reset, + mi_option_deprecated_page_reset, + mi_option_abandoned_page_purge, mi_option_deprecated_segment_reset, mi_option_eager_commit_delay, mi_option_purge_delay, @@ -340,7 +340,7 @@ typedef enum mi_option_e { mi_option_max_segment_reclaim, mi_option_destroy_on_exit, mi_option_arena_reserve, - mi_option_arena_purge_delay, + mi_option_arena_purge_mult, mi_option_allow_purge, mi_option_purge_extend_delay, _mi_option_last, @@ -348,7 +348,8 @@ typedef enum mi_option_e { mi_option_large_os_pages = mi_option_allow_large_os_pages, mi_option_eager_region_commit = mi_option_arena_eager_commit, mi_option_reset_decommits = mi_option_purge_decommits, - mi_option_reset_delay = mi_option_purge_delay + mi_option_reset_delay = mi_option_purge_delay, + mi_option_abandoned_page_reset = mi_option_abandoned_page_purge } mi_option_t; diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 298a373b..ac61faae 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -284,7 +284,6 @@ typedef struct mi_page_s { // "owned" by the segment uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` uint8_t segment_in_use:1; // `true` if the segment allocated this page - uint8_t is_reset:1; // `true` if the page memory was reset uint8_t is_committed:1; // `true` if the page virtual memory is committed uint8_t is_zero_init:1; // `true` if the page was zero initialized @@ -327,10 +326,14 @@ typedef struct mi_segment_s { // memory fields size_t memid; // id for the os-level memory manager bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_large; // `true` if the memory is in OS large or huge pages. (`is_pinned` will be true) bool mem_is_committed; // `true` if the whole segment is eagerly committed size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) + bool allow_decommit; + bool allow_purge; + // segment fields _Atomic(struct mi_segment_s*) abandoned_next; struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init` @@ -553,7 +556,7 @@ typedef struct mi_os_tld_s { typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t medium_free; // queue of segments with free medium pages - mi_page_queue_t pages_reset; // queue of freed pages that can be reset + mi_page_queue_t pages_purge; // queue of freed pages that are delay purged size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments diff --git a/src/arena.c b/src/arena.c index 81097235..530304a7 100644 --- a/src/arena.c +++ b/src/arena.c @@ -411,7 +411,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t mi_assert_internal(arena->blocks_purge != NULL); if (!mi_option_is_enabled(mi_option_allow_purge)) return; - const long delay = mi_option_get(mi_option_arena_purge_delay); + const long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); if (_mi_preloading() || delay == 0) { // decommit directly mi_arena_purge(arena, bitmap_idx, blocks, stats); @@ -501,7 +501,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi } // if not fully purged, make sure to purge again in the future if (!full_purge) { - const long delay = mi_option_get(mi_option_arena_purge_delay); + const long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); mi_msecs_t expected = 0; mi_atomic_cas_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); } @@ -509,7 +509,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi } static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { - const long delay = mi_option_get(mi_option_arena_purge_delay); + const long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); if (_mi_preloading() || delay == 0 || !mi_option_is_enabled(mi_option_allow_purge)) return; // nothing will be scheduled const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); diff --git a/src/init.c b/src/init.c index d47ee14f..7a768d75 100644 --- a/src/init.c +++ b/src/init.c @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, false, + 0, false, false, false, 0, // capacity 0, // reserved capacity { 0 }, // flags diff --git a/src/options.c b/src/options.c index bb25c940..d801f0cd 100644 --- a/src/options.c +++ b/src/options.c @@ -67,8 +67,8 @@ static mi_option_desc_t options[_mi_option_last] = {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION(abandoned_page_reset) }, // reset free page memory when a thread terminates + { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(abandoned_page_purge) }, // purge free page memory when a thread terminates { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, // reset segment memory on free (needs eager commit) #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed @@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] = { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif - { 100, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation + { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION(allow_purge) }, // allow decommit/reset to free (physical) memory back to the OS { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, }; diff --git a/src/page.c b/src/page.c index 7da12c3a..b1de1612 100644 --- a/src/page.c +++ b/src/page.c @@ -253,7 +253,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); #endif - mi_assert_internal(!page->is_reset); + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); diff --git a/src/segment.c b/src/segment.c index 2beaaacb..1ba2209d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -126,8 +126,8 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) { #endif #if (MI_DEBUG>=2) -static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) { - mi_page_t* p = tld->pages_reset.first; +static bool mi_pages_purge_contains(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_page_t* p = tld->pages_purge.first; while (p != NULL) { if (p == page) return true; p = p->next; @@ -148,8 +148,8 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* if (!page->segment_in_use) { nfree++; } - if (page->segment_in_use || page->is_reset) { - mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + if (page->segment_in_use) { + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); } } mi_assert_internal(nfree + segment->used == segment->capacity); @@ -163,12 +163,12 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page != NULL); if (page->next != NULL || page->prev != NULL) { - mi_assert_internal(mi_pages_reset_contains(page, tld)); + mi_assert_internal(mi_pages_purge_contains(page, tld)); return false; } else { // both next and prev are NULL, check for singleton list - return (tld->pages_reset.first != page && tld->pages_reset.last != page); + return (tld->pages_purge.first != page && tld->pages_purge.last != page); } } @@ -231,37 +231,37 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* Page reset ----------------------------------------------------------- */ -static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { +static void mi_page_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + // todo: should we purge the guard page as well when MI_SECURE>=2 ? mi_assert_internal(page->is_committed); - if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_pinned || page->segment_in_use || !page->is_committed || page->is_reset) return; + mi_assert_internal(!page->segment_in_use); + if (!segment->allow_purge) return; + mi_assert_internal(page->used == 0); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); size_t psize; - void* start = mi_segment_raw_page_start(segment, page, &psize); - page->is_reset = true; - mi_assert_internal(size <= psize); - size_t reset_size = ((size == 0 || size > psize) ? psize : size); - if (reset_size > 0) { _mi_os_reset(start, reset_size, tld->stats); } + void* start = mi_segment_raw_page_start(segment, page, &psize); + const bool needs_recommit = _mi_os_purge(start, psize, tld->stats); + if (needs_recommit) { page->is_committed = false; } + page->used = 0; } -static bool mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) -{ - MI_UNUSED(size); MI_UNUSED(tld); - mi_assert_internal(page->is_reset); - mi_assert_internal(page->is_committed); - mi_assert_internal(!segment->mem_is_pinned); - if (segment->mem_is_pinned || !page->is_committed || !page->is_reset) return true; - page->is_reset = false; - /* +static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + if (page->is_committed) return true; + mi_assert_internal(segment->allow_decommit); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); + size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); - size_t unreset_size = (size == 0 || size > psize ? psize : size); - */ - // bool is_zero = false; - // bool ok = true; - // if (unreset_size > 0) { - // ok = _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); - // } - // if (is_zero) page->is_zero_init = true; + bool is_zero = false; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + bool ok = _mi_os_commit(start, psize + gsize, &is_zero, tld->stats); + if (!ok) return false; // failed to commit! + page->is_committed = true; + page->used = 0; + page->is_zero_init = is_zero; + if (gsize > 0) { + mi_segment_protect_range(start + psize, gsize, true); + } return true; } @@ -274,33 +274,33 @@ static bool mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, // a 32-bit field while the clock is always 64-bit we need to guard // against overflow, we use substraction to check for expiry which work // as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) -static void mi_page_reset_set_expire(mi_page_t* page) { +static void mi_page_purge_set_expire(mi_page_t* page) { + mi_assert_internal(page->used == 0); uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_purge_delay); page->used = expire; } -static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) { +static bool mi_page_purge_is_expired(mi_page_t* page, mi_msecs_t now) { int32_t expire = (int32_t)(page->used); return (((int32_t)now - expire) >= 0); } -static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert_internal(!page->segment_in_use || !page->is_committed); +static void mi_segment_schedule_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(!page->segment_in_use); mi_assert_internal(mi_page_not_in_queue(page,tld)); - mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); mi_assert_internal(_mi_page_segment(page)==segment); - if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_pinned || page->segment_in_use || !page->is_committed || page->is_reset) return; + if (!segment->allow_purge) return; if (mi_option_get(mi_option_purge_delay) == 0) { - // reset immediately? - mi_page_reset(segment, page, 0, tld); + // purge immediately? + mi_page_purge(segment, page, tld); } else { // otherwise push on the delayed page reset queue - mi_page_queue_t* pq = &tld->pages_reset; + mi_page_queue_t* pq = &tld->pages_purge; // push on top - mi_page_reset_set_expire(page); + mi_page_purge_set_expire(page); page->next = pq->first; page->prev = NULL; if (pq->first == NULL) { @@ -314,13 +314,14 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen } } -static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { +static void mi_page_purge_remove(mi_page_t* page, mi_segments_tld_t* tld) { if (mi_page_not_in_queue(page,tld)) return; - mi_page_queue_t* pq = &tld->pages_reset; + mi_page_queue_t* pq = &tld->pages_purge; mi_assert_internal(pq!=NULL); mi_assert_internal(!page->segment_in_use); - mi_assert_internal(mi_pages_reset_contains(page, tld)); + mi_assert_internal(page->used != 0); + mi_assert_internal(mi_pages_purge_contains(page, tld)); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == pq->last) pq->last = page->prev; @@ -329,14 +330,14 @@ static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { page->used = 0; } -static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool force_reset, mi_segments_tld_t* tld) { +static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge, mi_segments_tld_t* tld) { if (segment->mem_is_pinned) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use && page->is_committed && !page->is_reset) { - mi_pages_reset_remove(page, tld); - if (force_reset) { - mi_page_reset(segment, page, 0, tld); + if (!page->segment_in_use) { + mi_page_purge_remove(page, tld); + if (force_purge) { + mi_page_purge(segment, page, tld); } } else { @@ -345,16 +346,16 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for } } -static void mi_reset_delayed(mi_segments_tld_t* tld) { - if (!mi_option_is_enabled(mi_option_page_reset)) return; +static void mi_pages_try_purge(mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_allow_purge)) return; + mi_msecs_t now = _mi_clock_now(); - mi_page_queue_t* pq = &tld->pages_reset; + mi_page_queue_t* pq = &tld->pages_purge; // from oldest up to the first that has not expired yet mi_page_t* page = pq->last; - while (page != NULL && mi_page_reset_is_expired(page,now)) { + while (page != NULL && mi_page_purge_is_expired(page,now)) { mi_page_t* const prev = page->prev; // save previous field - mi_page_reset(_mi_page_segment(page), page, 0, tld); - page->used = 0; + mi_page_purge(_mi_page_segment(page), page, tld); page->prev = page->next = NULL; page = prev; } @@ -477,27 +478,17 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } - bool any_reset = false; bool fully_committed = true; size_t committed = 0; const size_t page_size = mi_segment_raw_page_size(segment); for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->is_committed) { committed += page_size; } - if (!page->is_committed) { fully_committed = false; } - if (page->is_reset) { any_reset = true; } + if (!page->is_committed) { fully_committed = false; } } - MI_UNUSED(any_reset); MI_UNUSED(fully_committed); mi_assert_internal((fully_committed && committed == segment_size) || (!fully_committed && committed < segment_size)); - - // TODO: for now, pages always reset but we can purge instead allowing for pages to be decommitted. - /* - if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { - fully_committed = false; - } - */ - + _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged) _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, committed, tld->stats); } @@ -507,8 +498,8 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { MI_UNUSED(tld); #if MI_DEBUG>=2 if (!_mi_is_main_thread()) { - mi_assert_internal(tld->pages_reset.first == NULL); - mi_assert_internal(tld->pages_reset.last == NULL); + mi_assert_internal(tld->pages_purge.first == NULL); + mi_assert_internal(tld->pages_purge.last == NULL); } #endif } @@ -551,10 +542,13 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme mi_track_mem_undefined(segment, info_size); MI_UNUSED(info_size); segment->memid = memid; - segment->mem_is_pinned = (mem_large || is_pinned); + segment->mem_is_pinned = is_pinned; + segment->mem_is_large = mem_large; segment->mem_is_committed = commit; segment->mem_alignment = alignment; segment->mem_align_offset = align_offset; + segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; + segment->allow_purge = segment->allow_decommit && mi_option_is_enabled(mi_option_allow_purge); mi_segments_track_size((long)(*segment_size), tld); _mi_segment_map_allocated_at(segment); return segment; @@ -610,7 +604,6 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, for (size_t i = 0; i < capacity; i++) { mi_assert_internal(i <= 255); segment->pages[i].segment_idx = (uint8_t)i; - segment->pages[i].is_reset = false; segment->pages[i].is_committed = commit; segment->pages[i].is_zero_init = is_zero; } @@ -641,11 +634,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { MI_UNUSED(force); mi_assert(segment != NULL); - // note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse) - bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset)); - mi_pages_reset_remove_all_in_segment(segment, force_reset, tld); - mi_segment_remove_from_free_queue(segment,tld); - + // don't purge as we are freeing now + mi_segment_remove_all_purges(segment, false /* don't force as we are about to free */, tld); + mi_segment_remove_from_free_queue(segment, tld); + mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); mi_assert(segment->next == NULL); @@ -668,35 +660,15 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(_mi_page_segment(page) == segment); mi_assert_internal(!page->segment_in_use); - mi_pages_reset_remove(page, tld); + mi_page_purge_remove(page, tld); + // check commit - if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_pinned); - mi_assert_internal(!page->is_reset); - size_t psize; - uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); - bool is_zero = false; - const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); - bool ok = _mi_os_commit(start, psize + gsize, &is_zero, tld->stats); - if (!ok) return false; // failed to commit! - if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } - if (is_zero) { page->is_zero_init = true; } - page->is_committed = true; - } + if (!mi_page_ensure_committed(segment, page, tld)) return false; + // set in-use before doing unreset to prevent delayed reset page->segment_in_use = true; - segment->used++; - // check reset - if (page->is_reset) { - mi_assert_internal(!segment->mem_is_pinned); - bool ok = mi_page_unreset(segment, page, 0, tld); - if (!ok) { - page->segment_in_use = false; - segment->used--; - return false; - } - } - mi_assert_internal(page->segment_in_use); + segment->used++; + mi_assert_internal(page->segment_in_use && page->is_committed && page->used==0 && !mi_pages_purge_contains(page,tld)); mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) { // if no more free pages, remove from the queue @@ -714,7 +686,7 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); // clear page data; can be called on abandoned segments -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld) +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); @@ -725,35 +697,24 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - // calculate the used size from the raw (non-aligned) start of the page - //size_t pre_size; - //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); - //size_t used_size = pre_size + (page->capacity * page->block_size); - page->is_zero_init = false; page->segment_in_use = false; - // reset the page memory to reduce memory pressure? - // note: must come after setting `segment_in_use` to false but before block_size becomes 0 - //mi_page_reset(segment, page, 0 /*used_size*/, tld); - // zero the page data, but not the segment fields and capacity, and block_size (for page size calculations) uint32_t block_size = page->xblock_size; uint16_t capacity = page->capacity; uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); - memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); page->capacity = capacity; page->reserved = reserved; page->xblock_size = block_size; segment->used--; - // add to the free page list for reuse/reset - if (allow_reset) { - mi_pages_reset_add(segment, page, tld); - } + // schedule purge + mi_segment_schedule_purge(segment, page, tld); - page->capacity = 0; // after reset these can be zero'd now + page->capacity = 0; // after purge these can be zero'd now page->reserved = 0; } @@ -762,10 +723,10 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment,tld)); - mi_reset_delayed(tld); + mi_pages_try_purge(tld); // mark it as free now - mi_segment_page_clear(segment, page, true, tld); + mi_segment_page_clear(segment, page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -969,8 +930,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed - mi_reset_delayed(tld); - mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); + mi_pages_try_purge(tld); + mi_segment_remove_all_purges(segment, mi_option_is_enabled(mi_option_abandoned_page_purge), tld); mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); @@ -988,7 +949,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); @@ -1056,7 +1017,6 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { - mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); mi_assert_internal(mi_page_not_in_queue(page, tld)); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); @@ -1071,7 +1031,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { // if everything free already, clear the page directly - mi_segment_page_clear(segment, page, true, tld); // reset is ok now + mi_segment_page_clear(segment, page, tld); // reset is ok now } else { // otherwise reclaim it into the heap @@ -1081,10 +1041,12 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } } } - else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet + /* expired + else if (page->is_committed) { // not in-use, and not reset yet // note: do not reset as this includes pages that were not touched before - // mi_pages_reset_add(segment, page, tld); + // mi_pages_purge_add(segment, page, tld); } + */ } mi_assert_internal(segment->abandoned == 0); if (segment->used == 0) { @@ -1271,7 +1233,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize); // reset the part of the page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) - if (page_alignment > 0 && !segment->mem_is_pinned && page->is_committed) { + if (page_alignment > 0 && segment->allow_decommit && page->is_committed) { uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment); mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment)); mi_assert_internal(psize - (aligned_p - start) >= size); @@ -1319,7 +1281,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc mi_assert_internal(segment == _mi_page_segment(page)); mi_assert_internal(page->used == 1); // this is called just before the free mi_assert_internal(page->free == NULL); - if (!segment->mem_is_pinned && page->is_committed) { + if (segment->allow_decommit && page->is_committed) { const size_t usize = mi_usable_size(block) - sizeof(mi_block_t); uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); _mi_os_reset(p, usize, &_mi_stats_main); @@ -1354,7 +1316,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); - mi_reset_delayed(tld); + // mi_segment_try_purge(tld); mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); return page; } From 96b02dda1ff02db716b48d86e60fcf67f3593b45 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 8 Apr 2023 17:55:07 -0700 Subject: [PATCH 034/102] fix accidental cmake move --- cmake/JoinPaths.cmake | 23 +++++++++++++++++++++++ cmake/mimalloc-config-version.cmake | 19 +++++++++++++++++++ cmake/mimalloc-config.cmake | 14 ++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 cmake/JoinPaths.cmake create mode 100644 cmake/mimalloc-config-version.cmake create mode 100644 cmake/mimalloc-config.cmake diff --git a/cmake/JoinPaths.cmake b/cmake/JoinPaths.cmake new file mode 100644 index 00000000..c68d91b8 --- /dev/null +++ b/cmake/JoinPaths.cmake @@ -0,0 +1,23 @@ +# This module provides function for joining paths +# known from most languages +# +# SPDX-License-Identifier: (MIT OR CC0-1.0) +# Copyright 2020 Jan Tojnar +# https://github.com/jtojnar/cmake-snips +# +# Modelled after Python’s os.path.join +# https://docs.python.org/3.7/library/os.path.html#os.path.join +# Windows not supported +function(join_paths joined_path first_path_segment) + set(temp_path "${first_path_segment}") + foreach(current_segment IN LISTS ARGN) + if(NOT ("${current_segment}" STREQUAL "")) + if(IS_ABSOLUTE "${current_segment}") + set(temp_path "${current_segment}") + else() + set(temp_path "${temp_path}/${current_segment}") + endif() + endif() + endforeach() + set(${joined_path} "${temp_path}" PARENT_SCOPE) +endfunction() diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake new file mode 100644 index 00000000..a44c121d --- /dev/null +++ b/cmake/mimalloc-config-version.cmake @@ -0,0 +1,19 @@ +set(mi_version_major 2) +set(mi_version_minor 1) +set(mi_version_patch 2) +set(mi_version ${mi_version_major}.${mi_version_minor}) + +set(PACKAGE_VERSION ${mi_version}) +if(PACKAGE_FIND_VERSION_MAJOR) + if("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "${mi_version_major}") + if ("${PACKAGE_FIND_VERSION_MINOR}" EQUAL "${mi_version_minor}") + set(PACKAGE_VERSION_EXACT TRUE) + elseif("${PACKAGE_FIND_VERSION_MINOR}" LESS "${mi_version_minor}") + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_UNSUITABLE TRUE) + endif() + else() + set(PACKAGE_VERSION_UNSUITABLE TRUE) + endif() +endif() diff --git a/cmake/mimalloc-config.cmake b/cmake/mimalloc-config.cmake new file mode 100644 index 00000000..a49b02a2 --- /dev/null +++ b/cmake/mimalloc-config.cmake @@ -0,0 +1,14 @@ +include(${CMAKE_CURRENT_LIST_DIR}/mimalloc.cmake) +get_filename_component(MIMALLOC_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH) # one up from the cmake dir, e.g. /usr/local/lib/cmake/mimalloc-2.0 +get_filename_component(MIMALLOC_VERSION_DIR "${CMAKE_CURRENT_LIST_DIR}" NAME) +string(REPLACE "/lib/cmake" "/lib" MIMALLOC_LIBRARY_DIR "${MIMALLOC_CMAKE_DIR}") +if("${MIMALLOC_VERSION_DIR}" EQUAL "mimalloc") + # top level install + string(REPLACE "/lib/cmake" "/include" MIMALLOC_INCLUDE_DIR "${MIMALLOC_CMAKE_DIR}") + set(MIMALLOC_OBJECT_DIR "${MIMALLOC_LIBRARY_DIR}") +else() + # versioned + string(REPLACE "/lib/cmake/" "/include/" MIMALLOC_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}") + string(REPLACE "/lib/cmake/" "/lib/" MIMALLOC_OBJECT_DIR "${CMAKE_CURRENT_LIST_DIR}") +endif() +set(MIMALLOC_TARGET_DIR "${MIMALLOC_LIBRARY_DIR}") # legacy From a94b31d804313251008443fa040759b1d2e6aff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Horv=C3=A1th?= Date: Wed, 12 Apr 2023 16:59:49 -0700 Subject: [PATCH 035/102] Fix option name for ETW tracking --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 003cd8cf..7af7a264 100644 --- a/readme.md +++ b/readme.md @@ -518,7 +518,7 @@ Adress sanitizer support is in its initial development -- please report any issu ### ETW Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though -mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACE_ETW=ON` cmake option. +mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACK_ETW=ON` cmake option. You can then capture an allocation trace using the Windows performance recorder (WPR), using the `src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use: From 290443aee60e4402c1dcb740ba541718607bd4e4 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 13 Apr 2023 11:41:08 -0700 Subject: [PATCH 036/102] fix assertion/invariant --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index 1ba2209d..5542705d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -355,8 +355,8 @@ static void mi_pages_try_purge(mi_segments_tld_t* tld) { mi_page_t* page = pq->last; while (page != NULL && mi_page_purge_is_expired(page,now)) { mi_page_t* const prev = page->prev; // save previous field + mi_page_purge_remove(page, tld); // remove from the list to maintain invariant for mi_page_purge mi_page_purge(_mi_page_segment(page), page, tld); - page->prev = page->next = NULL; page = prev; } // discard the reset pages from the queue From c90a2bbd0a579236b4aa0ec93f890188cc261d27 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 13 Apr 2023 12:17:52 -0700 Subject: [PATCH 037/102] make memid's abstract for safety --- include/mimalloc/internal.h | 13 ++--- include/mimalloc/types.h | 19 ++++++- src/arena.c | 98 ++++++++++++++++++++++++------------- src/heap.c | 2 +- src/segment.c | 2 +- 5 files changed, 90 insertions(+), 44 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 59f81ee1..e8cc8581 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -115,11 +115,12 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, size_t committed, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id); -bool _mi_arena_is_os_allocated(size_t arena_memid); +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, mi_memid_t memid, size_t committed, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +bool _mi_arena_memid_is_os_allocated(mi_memid_t memid); +bool _mi_arena_memid_is_static_allocated(mi_memid_t memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); bool _mi_arena_contains(const void* p); @@ -170,7 +171,7 @@ void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); void _mi_heap_destroy_all(void); -bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); // "stats.c" void _mi_stats_done(mi_stats_t* stats); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ac61faae..90c00279 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -319,12 +319,29 @@ typedef enum mi_page_kind_e { MI_PAGE_HUGE // huge blocks (>512KiB) are put into a single page in a segment of the exact size (but still 2MiB aligned) } mi_page_kind_t; + +// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. +typedef enum mi_memkind_e { + MI_MEM_NONE, + MI_MEM_OS, + MI_MEM_STATIC, + MI_MEM_ARENA +} mi_memkind_t; + +typedef struct mi_memid_s { + size_t arena_idx; + mi_arena_id_t arena_id; + bool arena_is_exclusive; + mi_memkind_t memkind; +} mi_memid_t; + + // Segments are large allocated memory blocks (2MiB on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { // memory fields - size_t memid; // id for the os-level memory manager + mi_memid_t memid; // id for the os-level memory manager bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_large; // `true` if the memory is in OS large or huge pages. (`is_pinned` will be true) bool mem_is_committed; // `true` if the whole segment is eagerly committed diff --git a/src/arena.c b/src/arena.c index 530304a7..f9ba2f30 100644 --- a/src/arena.c +++ b/src/arena.c @@ -70,7 +70,6 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, /* ----------------------------------------------------------- Arena id's - 0 is used for non-arena's (like OS memory) id = arena_index + 1 ----------------------------------------------------------- */ @@ -80,10 +79,7 @@ static size_t mi_arena_id_index(mi_arena_id_t id) { static mi_arena_id_t mi_arena_id_create(size_t arena_index) { mi_assert_internal(arena_index < MI_MAX_ARENAS); - mi_assert_internal(MI_MAX_ARENAS <= 126); - int id = (int)arena_index + 1; - mi_assert_internal(id >= 1 && id <= 127); - return id; + return (int)arena_index + 1; } mi_arena_id_t _mi_arena_id_none(void) { @@ -95,36 +91,67 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus (arena_id == req_arena_id)); } - /* ----------------------------------------------------------- - Arena allocations get a memory id where the lower 8 bits are - the arena id, and the upper bits the block index. + memory id's ----------------------------------------------------------- */ -// Use `0` as a special id for direct OS allocated memory. -#define MI_MEMID_OS 0 - -static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_index_t bitmap_index) { - mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? - mi_assert_internal(id >= 0 && id <= 0x7F); - return ((bitmap_index << 8) | ((uint8_t)id & 0x7F) | (exclusive ? 0x80 : 0)); +static mi_memid_t mi_arena_memid_none(void) { + mi_memid_t memid; + memid.memkind = MI_MEM_NONE; + memid.arena_id = 0; + memid.arena_idx = 0; + memid.arena_is_exclusive = false; + return memid; } -static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { - *bitmap_index = (arena_memid >> 8); - mi_arena_id_t id = (int)(arena_memid & 0x7F); - *arena_index = mi_arena_id_index(id); - return ((arena_memid & 0x80) != 0); +static mi_memid_t mi_arena_memid_os(void) { + mi_memid_t memid = mi_arena_memid_none(); + memid.memkind = MI_MEM_OS; + return memid; } -bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) { - mi_arena_id_t id = (int)(arena_memid & 0x7F); - bool exclusive = ((arena_memid & 0x80) != 0); - return mi_arena_id_is_suitable(id, exclusive, request_arena_id); +/* +static mi_memid_t mi_arena_memid_static(void) { + mi_memid_t memid = mi_arena_memid_none(); + memid.memkind = MI_MEM_STATIC; + return memid; +} +*/ + +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { + // note: works also for OS and STATIC memory with a zero arena_id. + return mi_arena_id_is_suitable(memid.arena_id, memid.arena_is_exclusive, request_arena_id); } -bool _mi_arena_is_os_allocated(size_t arena_memid) { - return (arena_memid == MI_MEMID_OS); +bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) { + return (memid.memkind == MI_MEM_OS); +} + +bool _mi_arena_is_static_allocated(mi_memid_t memid) { + return (memid.memkind == MI_MEM_STATIC); +} + + + +/* ----------------------------------------------------------- + Arena allocations get a (currently) 16-bit memory id where the + lower 8 bits are the arena id, and the upper bits the block index. +----------------------------------------------------------- */ + +static mi_memid_t mi_arena_memid_create(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { + mi_memid_t memid; + memid.memkind = MI_MEM_ARENA; + memid.arena_id = id; + memid.arena_idx = bitmap_index; + memid.arena_is_exclusive = is_exclusive; + return memid; +} + +static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { + mi_assert_internal(memid.memkind == MI_MEM_ARENA); + *arena_index = mi_arena_id_index(memid.arena_id); + *bitmap_index = memid.arena_idx; + return memid.arena_is_exclusive; } static size_t mi_block_count_of_size(size_t size) { @@ -163,7 +190,7 @@ static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { MI_UNUSED(arena_index); mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); @@ -214,7 +241,7 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_ // allocate in a speficic arena static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); @@ -236,7 +263,7 @@ static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, int numa_node, size_t // allocate from an arena with fallback to the OS static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); @@ -317,12 +344,13 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0); } + void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *memid = MI_MEMID_OS; + *memid = mi_arena_memid_none(); *is_zero = false; *is_pinned = false; @@ -350,13 +378,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset return NULL; } - *memid = MI_MEMID_OS; + *memid = mi_arena_memid_os(); void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, is_zero, tld->stats); if (p != NULL) { *is_pinned = *large; } return p; } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); } @@ -538,7 +566,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, size_t committed_size, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, mi_memid_t memid, size_t committed_size, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); mi_assert_internal(committed_size <= size); if (p==NULL) return; @@ -546,7 +574,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, const bool all_committed = (committed_size == size); - if (memid == MI_MEMID_OS) { + if (_mi_arena_memid_is_os_allocated(memid)) { // was a direct OS allocation, pass through if (!all_committed && committed_size > 0) { // if partially committed, adjust the committed stats diff --git a/src/heap.c b/src/heap.c index 53923cf6..52087234 100644 --- a/src/heap.c +++ b/src/heap.c @@ -221,7 +221,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { return mi_heap_new_in_arena(_mi_arena_id_none()); } -bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) { +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) { return _mi_arena_memid_is_suitable(memid, heap->arena_id); } diff --git a/src/segment.c b/src/segment.c index 5542705d..b8651caf 100644 --- a/src/segment.c +++ b/src/segment.c @@ -513,7 +513,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme size_t pre_size, size_t info_size, size_t* segment_size, bool* is_zero, bool* commit, mi_segments_tld_t* tld, mi_os_tld_t* tld_os) { - size_t memid; + mi_memid_t memid; bool mem_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy bool is_pinned = false; size_t align_offset = 0; From 0ba79d01f61854064333ac03c536aa49632ba618 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 13 Apr 2023 13:19:39 -0700 Subject: [PATCH 038/102] allow static allocation in arenas for internal metadata --- include/mimalloc/internal.h | 4 +- src/arena.c | 92 ++++++++++++++++++++++++++++++------- src/init.c | 21 ++++++--- 3 files changed, 91 insertions(+), 26 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e8cc8581..2bf57fd0 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -119,10 +119,10 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_o void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); -bool _mi_arena_memid_is_os_allocated(mi_memid_t memid); -bool _mi_arena_memid_is_static_allocated(mi_memid_t memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); bool _mi_arena_contains(const void* p); +void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void _mi_arena_meta_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); // "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); diff --git a/src/arena.c b/src/arena.c index f9ba2f30..0440e481 100644 --- a/src/arena.c +++ b/src/arena.c @@ -47,6 +47,7 @@ typedef struct mi_arena_s { size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t meta_size; // size of the arena structure itself including the bitmaps + mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL @@ -110,28 +111,18 @@ static mi_memid_t mi_arena_memid_os(void) { return memid; } -/* static mi_memid_t mi_arena_memid_static(void) { mi_memid_t memid = mi_arena_memid_none(); memid.memkind = MI_MEM_STATIC; return memid; } -*/ + bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { // note: works also for OS and STATIC memory with a zero arena_id. return mi_arena_id_is_suitable(memid.arena_id, memid.arena_is_exclusive, request_arena_id); } -bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) { - return (memid.memkind == MI_MEM_OS); -} - -bool _mi_arena_is_static_allocated(mi_memid_t memid) { - return (memid.memkind == MI_MEM_STATIC); -} - - /* ----------------------------------------------------------- Arena allocations get a (currently) 16-bit memory id where the @@ -167,6 +158,70 @@ static size_t mi_arena_size(mi_arena_t* arena) { } +/* ----------------------------------------------------------- + Special static area for mimalloc internal structures + to avoid OS calls (for example, for the arena and thread + metadata) +----------------------------------------------------------- */ + +#define MI_ARENA_STATIC_MAX (MI_INTPTR_SIZE*8*MI_KiB) // 64 KiB on 64-bit + +static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; +static _Atomic(size_t) mi_arena_static_top; + +static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { + *memid = mi_arena_memid_static(); + if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; + if (mi_atomic_load_relaxed(&mi_arena_static_top) >= MI_ARENA_STATIC_MAX) return NULL; + + // try to claim space + if (alignment == 0) { alignment = 1; } + const size_t oversize = size + alignment - 1; + if (oversize > MI_ARENA_STATIC_MAX) return NULL; + const size_t oldtop = mi_atomic_add_acq_rel(&mi_arena_static_top, oversize); + size_t top = oldtop + oversize; + if (top > MI_ARENA_STATIC_MAX) { + // try to roll back, ok if this fails + mi_atomic_cas_strong_acq_rel(&mi_arena_static_top, &top, oldtop); + return NULL; + } + + // success + *memid = mi_arena_memid_static(); + const size_t start = _mi_align_up(oldtop, alignment); + uint8_t* const p = &mi_arena_static[start]; + _mi_memzero(p, size); + return p; +} + +void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { + *memid = mi_arena_memid_none(); + + // try static + void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid); + if (p != NULL) { + *memid = mi_arena_memid_static(); + return p; + } + + // or fall back to the OS + bool is_zero = false; + p = _mi_os_alloc(size, &is_zero, stats); + if (p != NULL) { + *memid = mi_arena_memid_os(); + if (!is_zero) { _mi_memzero(p, size); } + return p; + } + + return NULL; +} + +void _mi_arena_meta_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) { + if (memid.memkind == MI_MEM_OS) { + _mi_os_free(p, size, stats); + } +} + /* ----------------------------------------------------------- Thread safe allocation in an arena @@ -573,8 +628,10 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, if (size==0) return; const bool all_committed = (committed_size == size); - - if (_mi_arena_memid_is_os_allocated(memid)) { + if (memid.memkind == MI_MEM_STATIC) { + // nothing to do + } + else if (memid.memkind == MI_MEM_OS) { // was a direct OS allocation, pass through if (!all_committed && committed_size > 0) { // if partially committed, adjust the committed stats @@ -660,7 +717,7 @@ static void mi_arenas_destroy(void) { else { _mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main); } - _mi_os_free(arena, arena->meta_size, &_mi_stats_main); + _mi_arena_meta_free(arena, arena->meta_size, arena->meta_memid, &_mi_stats_main); } else { new_max_arena = i; @@ -731,16 +788,17 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); const size_t bitmaps = (allow_decommit ? 4 : 2); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, NULL, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + mi_memid_t meta_memid; + mi_arena_t* arena = (mi_arena_t*)_mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; - _mi_memzero(arena, asize); - + // already zero'd due to os_alloc // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); arena->exclusive = exclusive; arena->owned = owned; arena->meta_size = asize; + arena->meta_memid = meta_memid; arena->block_count = bcount; arena->field_count = fields; arena->start = (uint8_t*)start; diff --git a/src/init.c b/src/init.c index 7a768d75..b2444cfc 100644 --- a/src/init.c +++ b/src/init.c @@ -177,6 +177,7 @@ mi_heap_t* _mi_heap_main_get(void) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_memid_t memid; } mi_thread_data_t; @@ -188,28 +189,35 @@ typedef struct mi_thread_data_s { #define TD_CACHE_SIZE (8) static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; -static mi_thread_data_t* mi_thread_data_alloc(void) { +static mi_thread_data_t* mi_thread_data_zalloc(void) { // try to find thread metadata in the cache mi_thread_data_t* td; for (int i = 0; i < TD_CACHE_SIZE; i++) { td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { + // found cached allocation, try use it td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { + _mi_memzero(td, sizeof(*td)); return td; } } } - // if that fails, allocate directly from the OS - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), NULL, &_mi_stats_main); + + // if that fails, allocate as meta data + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_arena_meta_zalloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), NULL, &_mi_stats_main); + td = (mi_thread_data_t*)_mi_arena_meta_zalloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { // really out of memory _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); } } + if (td != NULL) { + td->memid = memid; + } return td; } @@ -225,7 +233,7 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) { } } // if that fails, just free it directly - _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); + _mi_arena_meta_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); } static void mi_thread_data_collect(void) { @@ -253,10 +261,9 @@ static bool _mi_heap_init(void) { } else { // use `_mi_os_alloc` to allocate directly from the OS - mi_thread_data_t* td = mi_thread_data_alloc(); + mi_thread_data_t* td = mi_thread_data_zalloc(); if (td == NULL) return false; - // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); From 48d0d0da9b682882923963a98ff45d8ffa88e9d3 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 13 Apr 2023 15:27:20 -0700 Subject: [PATCH 039/102] fix thread data cache to use pure os alloc --- include/mimalloc/internal.h | 2 -- src/arena.c | 8 ++++---- src/init.c | 26 ++++++++++++++------------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2bf57fd0..8b539876 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -121,8 +121,6 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_o bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); bool _mi_arena_contains(const void* p); -void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); -void _mi_arena_meta_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); // "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); diff --git a/src/arena.c b/src/arena.c index 0440e481..556e9cc6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -194,7 +194,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m return p; } -void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { +static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { *memid = mi_arena_memid_none(); // try static @@ -216,7 +216,7 @@ void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { return NULL; } -void _mi_arena_meta_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) { +static void mi_arena_meta_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) { if (memid.memkind == MI_MEM_OS) { _mi_os_free(p, size, stats); } @@ -717,7 +717,7 @@ static void mi_arenas_destroy(void) { else { _mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main); } - _mi_arena_meta_free(arena, arena->meta_size, arena->meta_memid, &_mi_stats_main); + mi_arena_meta_free(arena, arena->meta_size, arena->meta_memid, &_mi_stats_main); } else { new_max_arena = i; @@ -789,7 +789,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, const size_t bitmaps = (allow_decommit ? 4 : 2); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); mi_memid_t meta_memid; - mi_arena_t* arena = (mi_arena_t*)_mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; // already zero'd due to os_alloc diff --git a/src/init.c b/src/init.c index b2444cfc..8896e138 100644 --- a/src/init.c +++ b/src/init.c @@ -191,32 +191,34 @@ static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; static mi_thread_data_t* mi_thread_data_zalloc(void) { // try to find thread metadata in the cache - mi_thread_data_t* td; + bool is_zero = false; + mi_thread_data_t* td = NULL; for (int i = 0; i < TD_CACHE_SIZE; i++) { td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { // found cached allocation, try use it td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - _mi_memzero(td, sizeof(*td)); - return td; + break; } } } // if that fails, allocate as meta data - mi_memid_t memid; - td = (mi_thread_data_t*)_mi_arena_meta_zalloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_arena_meta_zalloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main); if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + } } } - if (td != NULL) { - td->memid = memid; + + if (td != NULL && !is_zero) { + _mi_memzero(td, sizeof(*td)); } return td; } @@ -233,7 +235,7 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) { } } // if that fails, just free it directly - _mi_arena_meta_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); + _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); } static void mi_thread_data_collect(void) { From e35e919ea43e63b29738ca94173b2252a7b3b6f1 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 13 Apr 2023 15:37:54 -0700 Subject: [PATCH 040/102] remove segment-cache as it is superseded by better arena management --- CMakeLists.txt | 1 - ide/vs2022/mimalloc-override.vcxproj | 1 - ide/vs2022/mimalloc.vcxproj | 1 - include/mimalloc/internal.h | 7 - src/heap.c | 4 - src/init.c | 1 - src/segment-cache.c | 277 --------------------------- src/segment.c | 36 +--- src/static.c | 1 - 9 files changed, 5 insertions(+), 324 deletions(-) delete mode 100644 src/segment-cache.c diff --git a/CMakeLists.txt b/CMakeLists.txt index de2689a3..2bcd1ef7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,6 @@ set(mi_sources src/page.c src/random.c src/segment.c - src/segment-cache.c src/segment-map.c src/stats.c src/prim/prim.c) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 52ed5282..e2c7f71d 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -257,7 +257,6 @@ - diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 33a719c1..2916483d 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -235,7 +235,6 @@ - diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ab8c0d28..f4a08a09 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -120,16 +120,9 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_o void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); -bool _mi_arena_memid_is_os_allocated(mi_memid_t memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); bool _mi_arena_contains(const void* p); -// "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, mi_memid_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); -void _mi_segment_cache_free_all(mi_os_tld_t* tld); - // "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); diff --git a/src/heap.c b/src/heap.c index 9238812b..14c3d66c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -163,10 +163,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_segment_thread_collect(&heap->tld->segments); } - // decommit in global segment caches - // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment - _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os); - // collect regions on program-exit (or shared library unload) if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { _mi_arena_collect(false /* destroy arenas */, true /* force purge */, &heap->tld->stats); diff --git a/src/init.c b/src/init.c index 5fb1ae43..17dc2faf 100644 --- a/src/init.c +++ b/src/init.c @@ -632,7 +632,6 @@ static void mi_cdecl mi_process_done(void) { // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os); // release all cached segments _mi_arena_collect(true /* destroy (owned) arenas */, true /* purge the rest */, &_mi_heap_main_get()->tld->stats); } diff --git a/src/segment-cache.c b/src/segment-cache.c deleted file mode 100644 index 2aee27c6..00000000 --- a/src/segment-cache.c +++ /dev/null @@ -1,277 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- - Implements a cache of segments to avoid expensive OS calls and to reuse - the commit_mask to optimize the commit/decommit calls. - The full memory map of all segments is also implemented here. ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc/internal.h" -#include "mimalloc/atomic.h" - -#include "./bitmap.h" // atomic bitmap - -// #define MI_CACHE_DISABLE 1 // define to completely disable the segment cache - -#define MI_CACHE_FIELDS (16) -#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit - -#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) -#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes - -typedef struct mi_cache_slot_s { - void* p; - mi_memid_t memid; - bool is_pinned; - mi_commit_mask_t commit_mask; - mi_commit_mask_t purge_mask; - _Atomic(mi_msecs_t) expire; -} mi_cache_slot_t; - -static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 - -static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! -static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; -static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free - -static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { - mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); -} - -mi_decl_noinline static void* mi_segment_cache_pop_ex( - bool all_suitable, - size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* purge_mask, bool large_allowed, - bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t _req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return NULL; -#else - - // only segment blocks - if (size != MI_SEGMENT_SIZE) return NULL; - - // numa node determines start field - const int numa_node = _mi_os_numa_node(tld); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot and make it unavailable - mi_bitmap_index_t bitidx = 0; - bool claimed = false; - mi_arena_id_t req_arena_id = _req_arena_id; - mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? - - if (large_allowed) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = true; - } - if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = false; - } - - if (!claimed) return NULL; - - // no longer available but still in-use - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - - // found a slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - void* p = slot->p; - *memid = slot->memid; - *is_pinned = slot->is_pinned; - *is_zero = false; - *commit_mask = slot->commit_mask; - *purge_mask = slot->purge_mask; - slot->p = NULL; - mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - - // mark the slot as free again - _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); - return p; -#endif -} - - -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) -{ - return mi_segment_cache_pop_ex(false, size, commit_mask, purge_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); -} - -static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) -{ - if (mi_commit_mask_is_empty(cmask)) { - // nothing - } - else if (mi_commit_mask_is_full(cmask)) { - // decommit the whole in one call - _mi_os_decommit(p, total, stats); - } - else { - // decommit parts - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t part = total/MI_COMMIT_MASK_BITS; - size_t idx; - size_t count; - mi_commit_mask_foreach(cmask, idx, count) { - void* start = (uint8_t*)p + (idx*part); - size_t size = count*part; - _mi_os_decommit(start, size, stats); - } - mi_commit_mask_foreach_end() - } - mi_commit_mask_create_empty(cmask); -} - -#define MI_MAX_PURGE_PER_PUSH (4) - -static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) -{ - MI_UNUSED(tld); - if (!mi_option_is_enabled(mi_option_allow_purge)) return; - mi_msecs_t now = _mi_clock_now(); - size_t purged = 0; - const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); - size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); - for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots - if (idx >= MI_CACHE_MAX) idx = 0; // wrap - mi_cache_slot_t* slot = &cache[idx]; - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // racy read - // seems expired, first claim it from available - purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // no need to check large as those cannot be decommitted anyways - // it was available, we claimed it (and made it unavailable) - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); - // we can now access it safely - expire = mi_atomic_loadi64_acquire(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // safe read - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - // still expired, decommit it - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask)); - _mi_abandoned_await_readers(); // wait until safe to decommit - // decommit committed parts - // TODO: instead of decommit, we could also free to the OS? - mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->purge_mask); - } - _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop - } - if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push - } - } -} - -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { - if (force) { - // called on `mi_collect(true)` but not on thread termination - _mi_segment_cache_free_all(tld); - } - else { - mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); - } -} - -void _mi_segment_cache_free_all(mi_os_tld_t* tld) { - mi_commit_mask_t commit_mask; - mi_commit_mask_t purge_mask; - bool is_pinned; - bool is_zero; - bool is_large; - mi_memid_t memid; - const size_t size = MI_SEGMENT_SIZE; - void* p; - do { - // keep popping and freeing the memory - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &purge_mask, - true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); - if (p != NULL) { - size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); - if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } - _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } - } while (p != NULL); -} - -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, mi_memid_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return false; -#else - - // purge expired entries - mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); - - // only cache normal segment blocks - if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast) - // This is a common case with reserved huge OS pages. - // - // (note: we could also allow segments that are already fully decommitted but that never happens - // as the first slice is always committed (for the segment metadata)) - if (!_mi_arena_memid_is_os_allocated(memid) && is_pinned) return false; - - // numa node determines start field - int numa_node = _mi_os_numa_node(NULL); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot - mi_bitmap_index_t bitidx; - bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (!claimed) return false; - - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); -#if MI_DEBUG>1 - if (is_pinned || is_large) { - mi_assert_internal(mi_commit_mask_is_full(commit_mask)); - } -#endif - - // set the slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - slot->p = start; - slot->memid = memid; - slot->is_pinned = is_pinned; - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->commit_mask = *commit_mask; - slot->purge_mask = *purge_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_purge)) { - long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); - if (delay == 0) { - _mi_abandoned_await_readers(); // wait until safe to decommit - mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->purge_mask); - } - else { - mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); - } - } - - // make it available - _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx); - return true; -#endif -} diff --git a/src/segment.c b/src/segment.c index af4ed95c..7d0d2c28 100644 --- a/src/segment.c +++ b/src/segment.c @@ -11,7 +11,6 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset #include -#define MI_USE_SEGMENT_CACHE 0 #define MI_PAGE_HUGE_ALIGN (256*1024) static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); @@ -393,28 +392,11 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // purge delayed decommits now? (no, leave it to the arena) // mi_segment_try_purge(segment,true,tld->stats); - // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); -#if MI_USE_SEGMENT_CACHE - if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache - || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->purge_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) -#endif - { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - /* - // if not all committed, an arena may decommit the whole area, but that double counts - // the already decommitted parts; adjust for that in the stats. - if (!mi_commit_mask_is_full(&segment->commit_mask)) { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - mi_assert_internal(size > csize); - if (size > csize) { - _mi_stat_increase(&_mi_stats_main.committed, size - csize); - } - } - */ - _mi_abandoned_await_readers(); // wait until safe to free - _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, csize, tld->stats); - } + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); + + _mi_abandoned_await_readers(); // wait until safe to free + _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, csize, tld->stats); } // called by threads that are terminating @@ -819,6 +801,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + MI_UNUSED(ppurge_mask); mi_memid_t memid; bool mem_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy bool is_pinned = false; @@ -837,15 +820,6 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment } const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = NULL; - - #if MI_USE_SEGMENT_CACHE - // get from cache? - if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, ppurge_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); - } - #else - MI_UNUSED(ppurge_mask); - #endif // get from OS if (segment==NULL) { diff --git a/src/static.c b/src/static.c index 831e9ecd..bc05dd72 100644 --- a/src/static.c +++ b/src/static.c @@ -32,7 +32,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "page.c" // includes page-queue.c #include "random.c" #include "segment.c" -#include "segment-cache.c" #include "segment-map.c" #include "stats.c" #include "prim/prim.c" From 9535726528e2b58e736e7f816df2ba1984fd65ba Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 14 Apr 2023 09:42:04 -0700 Subject: [PATCH 041/102] fix warnings on latest macOS --- src/options.c | 2 +- test/test-api.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/options.c b/src/options.c index 33ea0a1f..cfb15aec 100644 --- a/src/options.c +++ b/src/options.c @@ -240,7 +240,7 @@ void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { } // add stderr to the delayed output after the module is loaded -static void mi_add_stderr_output() { +static void mi_add_stderr_output(void) { mi_assert_internal(mi_out_default == NULL); mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output diff --git a/test/test-api.c b/test/test-api.c index c78e1972..852e3e8a 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -286,7 +286,7 @@ int main(void) { // Larger test functions // --------------------------------------------------- -bool test_heap1() { +bool test_heap1(void) { mi_heap_t* heap = mi_heap_new(); int* p1 = mi_heap_malloc_tp(heap,int); int* p2 = mi_heap_malloc_tp(heap,int); @@ -295,7 +295,7 @@ bool test_heap1() { return true; } -bool test_heap2() { +bool test_heap2(void) { mi_heap_t* heap = mi_heap_new(); int* p1 = mi_heap_malloc_tp(heap,int); int* p2 = mi_heap_malloc_tp(heap,int); @@ -306,7 +306,7 @@ bool test_heap2() { return true; } -bool test_stl_allocator1() { +bool test_stl_allocator1(void) { #ifdef __cplusplus std::vector > vec; vec.push_back(1); @@ -319,7 +319,7 @@ bool test_stl_allocator1() { struct some_struct { int i; int j; double z; }; -bool test_stl_allocator2() { +bool test_stl_allocator2(void) { #ifdef __cplusplus std::vector > vec; vec.push_back(some_struct()); From 5a866ca7e1cf17c02d17699beffdba3d25c02714 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 14 Apr 2023 09:44:36 -0700 Subject: [PATCH 042/102] fix build error on Android/Linux for an atomic signature (pr #724, issue #729) --- include/mimalloc/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index fe79fbca..6fb4951c 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -281,7 +281,7 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t; static inline bool mi_atomic_once( mi_atomic_once_t* once ) { if (mi_atomic_load_relaxed(once) != 0) return false; // quick test uintptr_t expected = 0; - return mi_atomic_cas_strong_acq_rel(once, &expected, 1); // try to set to 1 + return mi_atomic_cas_strong_acq_rel(once, &expected, 1UL); // try to set to 1 } // Yield From f30b302895302b8f0d64b947ee7009231d968e9c Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 14 Apr 2023 09:51:28 -0700 Subject: [PATCH 043/102] address concern #721 --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 1171a1ab..1f560c0a 100644 --- a/src/os.c +++ b/src/os.c @@ -217,8 +217,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { - mi_os_mem_free(p, size, commit, stats); _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); + mi_os_mem_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; From 462080a92e8df7ad718bdee69740914c8c2ac1d5 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 14 Apr 2023 10:02:01 -0700 Subject: [PATCH 044/102] add test for issue #602 --- test/test-api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test-api.c b/test/test-api.c index 852e3e8a..829d7d35 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -212,6 +212,11 @@ int main(void) { result = mi_heap_contains_block(heap, p); mi_heap_destroy(heap); } + CHECK_BODY("mimalloc-aligned12") { + void* p = mi_malloc_aligned(0x100, 0x100); + result = (((uintptr_t)p % 0x100) == 0); // #602 + mi_free(p); + } CHECK_BODY("malloc-aligned-at1") { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); }; From cd915900a23959947554e01bccc3d983eb8aa263 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Wed, 12 Apr 2023 01:37:48 +0800 Subject: [PATCH 045/102] atomic.h: unbreak build on Darwin PPC --- include/mimalloc/atomic.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 6fb4951c..1951b470 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -303,7 +303,7 @@ static inline void mi_atomic_yield(void) { } #elif (defined(__GNUC__) || defined(__clang__)) && \ (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ - defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); @@ -316,10 +316,16 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } -#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) +#ifdef __APPLE__ +static inline void mi_atomic_yield(void) { + __asm__ volatile ("or r27,r27,r27" ::: "memory"); +} +#else static inline void mi_atomic_yield(void) { __asm__ __volatile__ ("or 27,27,27" ::: "memory"); } +#endif #elif defined(__armel__) || defined(__ARMEL__) static inline void mi_atomic_yield(void) { __asm__ volatile ("nop" ::: "memory"); From 2647146abd24ca971c34939014b4dc84c12b4c3b Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Wed, 12 Apr 2023 01:48:08 +0800 Subject: [PATCH 046/102] prim.c: fix for macOS without MACH_TASK_BASIC_INFO --- src/prim/unix/prim.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 4efc10d3..c28c7759 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -610,11 +610,19 @@ void _mi_prim_process_info(mi_process_info_t* pinfo) pinfo->page_faults = 0; #elif defined(__APPLE__) pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes + #ifdef MACH_TASK_BASIC_INFO struct mach_task_basic_info info; mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { pinfo->current_rss = (size_t)info.resident_size; } + #else + struct task_basic_info info; + mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + pinfo->current_rss = (size_t)info.resident_size; + } + #endif #else pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB #endif From d106f0c116f6f1122357ac530e7a6d3230ab5547 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Wed, 12 Apr 2023 02:00:03 +0800 Subject: [PATCH 047/102] alloc-override-zone.c: fix for 10.6 rosetta/ppc --- src/prim/osx/alloc-override-zone.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/prim/osx/alloc-override-zone.c b/src/prim/osx/alloc-override-zone.c index 80bcfa93..0e0a99d9 100644 --- a/src/prim/osx/alloc-override-zone.c +++ b/src/prim/osx/alloc-override-zone.c @@ -195,7 +195,7 @@ static malloc_introspection_t mi_introspect = { .log = &intro_log, .force_lock = &intro_force_lock, .force_unlock = &intro_force_unlock, -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) .statistics = &intro_statistics, .zone_locked = &intro_zone_locked, #endif @@ -216,7 +216,7 @@ static malloc_zone_t mi_malloc_zone = { .batch_malloc = &zone_batch_malloc, .batch_free = &zone_batch_free, .introspect = &mi_introspect, -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) .version = 10, #else From 0174d19af37a129ac9d9f5d9d5562ea411597fe9 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 15 Apr 2023 19:49:14 -0700 Subject: [PATCH 048/102] increase thread data cache to 16 --- include/mimalloc/internal.h | 10 ++++++---- src/arena.c | 18 +++++++++++------- src/heap.c | 10 ++++++---- src/init.c | 13 ++++++------- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 8b539876..91206cac 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -80,10 +80,11 @@ extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); -bool _mi_preloading(void); // true while the C runtime is not ready +bool _mi_preloading(void); // true while the C runtime is not initialized yet mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; -mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap void _mi_thread_done(mi_heap_t* heap); +void _mi_thread_data_collect(void); // os.c void _mi_os_init(void); // called from process init @@ -119,8 +120,9 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_o void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); -void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); bool _mi_arena_contains(const void* p); +void _mi_arena_collect(bool force_purge, mi_stats_t* stats); +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); // "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); @@ -168,8 +170,8 @@ uint8_t _mi_bin(size_t size); // for stats void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); -void _mi_heap_destroy_all(void); bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); +void _mi_heap_unsafe_destroy_all(void); // "stats.c" void _mi_stats_done(mi_stats_t* stats); diff --git a/src/arena.c b/src/arena.c index 556e9cc6..131b24bf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -703,7 +703,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. -static void mi_arenas_destroy(void) { +static void mi_arenas_unsafe_destroy(void) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); size_t new_max_arena = 0; for (size_t i = 0; i < max_arena; i++) { @@ -730,15 +730,19 @@ static void mi_arenas_destroy(void) { mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); } - -void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats) { - if (free_arenas) { - mi_arenas_destroy(); - } - mi_arenas_try_purge(force_decommit, true, stats); +// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired +void _mi_arena_collect(bool force_purge, mi_stats_t* stats) { + mi_arenas_try_purge(force_purge, true /* visit all */, stats); } +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { + mi_arenas_unsafe_destroy(); + _mi_arena_collect(true /* force purge */, stats); // purge non-owned arenas +} +// Is a pointer inside any of our arenas? bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { diff --git a/src/heap.c b/src/heap.c index 52087234..18cfc706 100644 --- a/src/heap.c +++ b/src/heap.c @@ -151,14 +151,15 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); - // collect segment caches + // collect segment and thread caches if (collect >= MI_FORCE) { _mi_segment_thread_collect(&heap->tld->segments); } - // collect regions on program-exit (or shared library unload) + // collect arenas on program-exit (or shared library unload) if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - _mi_arena_collect(false /* destroy arenas */, true /* force purge */, &heap->tld->stats); + _mi_thread_data_collect(); // collect thread data cache + _mi_arena_collect(true /* force purge */, &heap->tld->stats); } } @@ -354,7 +355,8 @@ void mi_heap_destroy(mi_heap_t* heap) { } } -void _mi_heap_destroy_all(void) { +// forcefully destroy all heaps in the current thread +void _mi_heap_unsafe_destroy_all(void) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* curr = bheap->tld->heaps; while (curr != NULL) { diff --git a/src/init.c b/src/init.c index 8896e138..8ab0f943 100644 --- a/src/init.c +++ b/src/init.c @@ -177,7 +177,6 @@ mi_heap_t* _mi_heap_main_get(void) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; - mi_memid_t memid; } mi_thread_data_t; @@ -186,7 +185,7 @@ typedef struct mi_thread_data_s { // destroy many OS threads, this may causes too much overhead // per thread so we maintain a small cache of recently freed metadata. -#define TD_CACHE_SIZE (8) +#define TD_CACHE_SIZE (16) static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; static mi_thread_data_t* mi_thread_data_zalloc(void) { @@ -238,7 +237,7 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) { _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); } -static void mi_thread_data_collect(void) { +void _mi_thread_data_collect(void) { // free all thread metadata from the cache for (int i = 0; i < TD_CACHE_SIZE; i++) { mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); @@ -323,7 +322,6 @@ static bool _mi_heap_done(mi_heap_t* heap) { mi_thread_data_free((mi_thread_data_t*)heap); } else { - mi_thread_data_collect(); // free cached thread metadata #if 0 // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, // there may still be delete/free calls after the mi_fls_done is called. Issue #207 @@ -589,7 +587,7 @@ static void mi_cdecl mi_process_done(void) { _mi_prim_thread_done_auto_done(); #ifndef MI_SKIP_COLLECT_ON_EXIT - #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) + #if (MI_DEBUG || !defined(MI_SHARED_LIB)) // free all memory if possible on process exit. This is not needed for a stand-alone process // but should be done if mimalloc is statically linked into another shared library which // is repeatedly loaded/unloaded, see issue #281. @@ -601,8 +599,9 @@ static void mi_cdecl mi_process_done(void) { // since after process_done there might still be other code running that calls `free` (like at_exit routines, // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { - _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_arena_collect(true /* destroy (owned) arenas */, true /* purge the rest */, &_mi_heap_main_get()->tld->stats); + mi_collect(true /* force */); + _mi_heap_unsafe_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) + _mi_arena_unsafe_destroy_all(& _mi_heap_main_get()->tld->stats); } if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { From 0fc4de14406f046318b99b3ced09e71425dfb356 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 15 Apr 2023 21:31:17 -0700 Subject: [PATCH 049/102] use rich memid's to simplify the internal API's and invariants --- include/mimalloc/internal.h | 6 +- include/mimalloc/types.h | 43 +++++--- src/arena.c | 198 +++++++++++++++++++----------------- src/segment.c | 79 +++++++------- 4 files changed, 171 insertions(+), 155 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 91206cac..e346f120 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -116,9 +116,9 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, mi_memid_t memid, size_t committed, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); bool _mi_arena_contains(const void* p); void _mi_arena_collect(bool force_purge, mi_stats_t* stats); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 90c00279..18201ccf 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -322,16 +322,33 @@ typedef enum mi_page_kind_e { // Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. typedef enum mi_memkind_e { - MI_MEM_NONE, - MI_MEM_OS, - MI_MEM_STATIC, - MI_MEM_ARENA + MI_MEM_NONE, // not allocated + MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) + MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) + MI_MEM_OS, // allocated from the OS + MI_MEM_ARENA // allocated from an arena (the usual case) } mi_memkind_t; +typedef struct mi_memid_os_info { + size_t alignment; // allocated with the given alignment + size_t align_offset; // the offset that was aligned (used only for huge aligned pages) +} mi_memid_os_info_t; + +typedef struct mi_memid_arena_info { + size_t block_index; // index in the arena + mi_arena_id_t id; // arena id (>= 1) + bool is_exclusive; // the arena can only be used for specific arena allocations +} mi_memid_arena_info_t; + typedef struct mi_memid_s { - size_t arena_idx; - mi_arena_id_t arena_id; - bool arena_is_exclusive; + union { + mi_memid_os_info_t os; // only used for MI_MEM_OS + mi_memid_arena_info_t arena;// only used for MI_MEM_ARENA + } mem; + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) + bool is_large; // `true` if the memory is in OS large (2MiB) or huge (1GiB) pages. (`is_pinned` will be true) + bool was_committed; // `true` if the memory was originally allocated as committed + bool was_zero; // `true` if the memory was originally zero initialized mi_memkind_t memkind; } mi_memid_t; @@ -340,17 +357,12 @@ typedef struct mi_memid_s { // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { - // memory fields + // constant fields mi_memid_t memid; // id for the os-level memory manager - bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) - bool mem_is_large; // `true` if the memory is in OS large or huge pages. (`is_pinned` will be true) - bool mem_is_committed; // `true` if the whole segment is eagerly committed - size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) - size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) - bool allow_decommit; bool allow_purge; - + size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` + // segment fields _Atomic(struct mi_segment_s*) abandoned_next; struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init` @@ -361,7 +373,6 @@ typedef struct mi_segment_s { size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) - size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` diff --git a/src/arena.c b/src/arena.c index 131b24bf..33c6e4a4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -46,7 +46,7 @@ typedef struct mi_arena_s { _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) - size_t meta_size; // size of the arena structure itself including the bitmaps + size_t meta_size; // size of the arena structure itself (including its bitmaps) mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? @@ -96,55 +96,42 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus memory id's ----------------------------------------------------------- */ -static mi_memid_t mi_arena_memid_none(void) { +static mi_memid_t mi_memid_none(void) { mi_memid_t memid; + _mi_memzero(&memid, sizeof(memid)); memid.memkind = MI_MEM_NONE; - memid.arena_id = 0; - memid.arena_idx = 0; - memid.arena_is_exclusive = false; return memid; } -static mi_memid_t mi_arena_memid_os(void) { - mi_memid_t memid = mi_arena_memid_none(); +static mi_memid_t mi_memid_create(mi_memkind_t memkind) { + mi_memid_t memid = mi_memid_none(); + memid.memkind = memkind; + return memid; +} + +static mi_memid_t mi_memid_create_os(bool committed) { + mi_memid_t memid = mi_memid_none(); memid.memkind = MI_MEM_OS; + memid.was_committed = committed; return memid; } -static mi_memid_t mi_arena_memid_static(void) { - mi_memid_t memid = mi_arena_memid_none(); - memid.memkind = MI_MEM_STATIC; - return memid; -} - - bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { - // note: works also for OS and STATIC memory with a zero arena_id. - return mi_arena_id_is_suitable(memid.arena_id, memid.arena_is_exclusive, request_arena_id); + if (memid.memkind == MI_MEM_ARENA) { + return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); + } + else { + return mi_arena_id_is_suitable(0, false, request_arena_id); + } } + /* ----------------------------------------------------------- Arena allocations get a (currently) 16-bit memory id where the lower 8 bits are the arena id, and the upper bits the block index. ----------------------------------------------------------- */ -static mi_memid_t mi_arena_memid_create(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { - mi_memid_t memid; - memid.memkind = MI_MEM_ARENA; - memid.arena_id = id; - memid.arena_idx = bitmap_index; - memid.arena_is_exclusive = is_exclusive; - return memid; -} - -static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { - mi_assert_internal(memid.memkind == MI_MEM_ARENA); - *arena_index = mi_arena_id_index(memid.arena_id); - *bitmap_index = memid.arena_idx; - return memid.arena_is_exclusive; -} - static size_t mi_block_count_of_size(size_t size) { return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } @@ -157,6 +144,22 @@ static size_t mi_arena_size(mi_arena_t* arena) { return mi_arena_block_size(arena->block_count); } +static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { + mi_memid_t memid = mi_memid_create(MI_MEM_ARENA); + memid.mem.arena.id = id; + memid.mem.arena.block_index = bitmap_index; + memid.mem.arena.is_exclusive = is_exclusive; + return memid; +} + +static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { + mi_assert_internal(memid.memkind == MI_MEM_ARENA); + *arena_index = mi_arena_id_index(memid.mem.arena.id); + *bitmap_index = memid.mem.arena.block_index; + return memid.mem.arena.is_exclusive; +} + + /* ----------------------------------------------------------- Special static area for mimalloc internal structures @@ -170,7 +173,7 @@ static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; static _Atomic(size_t) mi_arena_static_top; static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { - *memid = mi_arena_memid_static(); + *memid = mi_memid_none(); if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; if (mi_atomic_load_relaxed(&mi_arena_static_top) >= MI_ARENA_STATIC_MAX) return NULL; @@ -187,7 +190,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m } // success - *memid = mi_arena_memid_static(); + *memid = mi_memid_create(MI_MEM_STATIC); const size_t start = _mi_align_up(oldtop, alignment); uint8_t* const p = &mi_arena_static[start]; _mi_memzero(p, size); @@ -195,20 +198,17 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m } static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { - *memid = mi_arena_memid_none(); + *memid = mi_memid_none(); // try static void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid); - if (p != NULL) { - *memid = mi_arena_memid_static(); - return p; - } + if (p != NULL) return p; // or fall back to the OS bool is_zero = false; p = _mi_os_alloc(size, &is_zero, stats); if (p != NULL) { - *memid = mi_arena_memid_os(); + *memid = mi_memid_create_os(true); if (!is_zero) { _mi_memzero(p, size); } return p; } @@ -216,7 +216,7 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st return NULL; } -static void mi_arena_meta_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) { +static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { if (memid.memkind == MI_MEM_OS) { _mi_os_free(p, size, stats); } @@ -244,8 +244,7 @@ static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index ----------------------------------------------------------- */ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) + bool commit, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { MI_UNUSED(arena_index); mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); @@ -255,10 +254,10 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_ if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! - void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); - *memid = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index); - *large = arena->is_large; - *is_pinned = (arena->is_large || !arena->allow_decommit); + void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); + *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); + memid->is_large = arena->is_large; + memid->is_pinned = (arena->is_large || !arena->allow_decommit); // none of the claimed blocks should be scheduled for a decommit if (arena->blocks_purge != NULL) { @@ -267,26 +266,31 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_ } // set the dirty bits (todo: no need for an atomic op here?) - *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + memid->was_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); // set commit state if (arena->blocks_committed == NULL) { // always committed - *commit = true; + memid->was_committed = true; } - else if (*commit) { + else if (commit) { // commit requested, but the range may not be committed as a whole: ensure it is committed now + memid->was_committed = true; bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { - bool commit_zero; - _mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats); - if (commit_zero) { *is_zero = true; } + bool commit_zero = false; + if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) { + memid->was_committed = false; + } + else { + if (commit_zero) { memid->was_zero = true; } + } } } else { // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + memid->was_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } // mi_track_mem_undefined(p,mi_arena_block_size(needed_bcount)); @@ -295,7 +299,7 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_ // allocate in a speficic arena static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, int numa_node, size_t size, size_t alignment, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); @@ -310,14 +314,14 @@ static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, int numa_node, size_t mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; if (arena->numa_node >= 0 && arena->numa_node != numa_node) return NULL; - if (!(*large) && arena->is_large) return NULL; - return mi_arena_alloc_at(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + if (!allow_large && arena->is_large) return NULL; + return mi_arena_alloc_at(arena, arena_index, bcount, commit, req_arena_id, memid, tld); } // allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, - bool* is_pinned, bool* is_zero, +static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED(alignment); @@ -333,9 +337,9 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if ((arena != NULL) && // (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages + (allow_large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_at(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, arena_index, bcount, commit, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -346,9 +350,9 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL && (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages + (allow_large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, i, bcount, commit, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -359,9 +363,9 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL && (arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages + (allow_large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages { - void* p = mi_arena_alloc_at(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arena_alloc_at(arena, i, bcount, commit, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -400,48 +404,53 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); + mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *memid = mi_arena_memid_none(); - *is_zero = false; - *is_pinned = false; + *memid = mi_memid_none(); - bool default_large = false; - if (large == NULL) large = &default_large; // ensure `large != NULL` const int numa_node = _mi_os_numa_node(tld); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - void* p = mi_arenas_alloc(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + void* p = mi_arenas_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena mi_arena_id_t arena_id = 0; - if (mi_arena_reserve(size,*large,req_arena_id,&arena_id)) { + if (mi_arena_reserve(size,allow_large,req_arena_id,&arena_id)) { // and try allocate in there - p = mi_arena_alloc_at_id(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + p = mi_arena_alloc_at_id(arena_id, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } - // finally, fall back to the OS + // if we cannot use OS allocation, return NULL if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } - - *memid = mi_arena_memid_os(); - void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, is_zero, tld->stats); - if (p != NULL) { *is_pinned = *large; } + + // finally, fall back to the OS + bool os_large = allow_large; + bool os_is_zero = false; + void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, commit, &os_large, &os_is_zero, tld->stats); + if (p != NULL) { + *memid = mi_memid_create_os(commit); + memid->is_large = os_large; + memid->is_pinned = os_large; + memid->was_zero = os_is_zero; + memid->mem.os.alignment = alignment; + memid->mem.os.align_offset = align_offset; + } return p; } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); } @@ -621,27 +630,28 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, mi_memid_t memid, size_t committed_size, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); mi_assert_internal(committed_size <= size); if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); - if (memid.memkind == MI_MEM_STATIC) { - // nothing to do - } - else if (memid.memkind == MI_MEM_OS) { + if (memid.memkind == MI_MEM_OS) { // was a direct OS allocation, pass through if (!all_committed && committed_size > 0) { // if partially committed, adjust the committed stats _mi_stat_decrease(&stats->committed, committed_size); } - _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats); + if (memid.mem.os.align_offset != 0) { + _mi_os_free_aligned(p, size, memid.mem.os.alignment, memid.mem.os.align_offset, all_committed, stats); + } + else { + _mi_os_free(p, size, stats); + } } - else { + else if (memid.memkind == MI_MEM_ARENA) { // allocated in an arena - mi_assert_internal(align_offset == 0); size_t arena_idx; size_t bitmap_idx; mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); @@ -696,6 +706,10 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, return; }; } + else { + // arena was none, external, or static; nothing to do + mi_assert_internal(memid.memkind <= MI_MEM_STATIC); + } // purge expired decommits mi_arenas_try_purge(false, false, stats); @@ -716,12 +730,12 @@ static void mi_arenas_unsafe_destroy(void) { } else { _mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main); - } - mi_arena_meta_free(arena, arena->meta_size, arena->meta_memid, &_mi_stats_main); + } } else { new_max_arena = i; } + mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size, &_mi_stats_main); } } diff --git a/src/segment.c b/src/segment.c index b8651caf..9485a55a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -201,7 +201,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* // and protect the last (or only) page too mi_assert_internal(MI_SECURE <= 1 || segment->page_kind >= MI_PAGE_LARGE); uint8_t* start = (uint8_t*)segment + segment->segment_size - os_psize; - if (protect && !segment->mem_is_committed) { + if (protect && !segment->memid.was_committed) { if (protect) { // ensure secure page is committed if (_mi_os_commit(start, os_psize, NULL, tld->stats)) { // if this fails that is ok (as it is an unaccessible page) @@ -331,7 +331,7 @@ static void mi_page_purge_remove(mi_page_t* page, mi_segments_tld_t* tld) { } static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge, mi_segments_tld_t* tld) { - if (segment->mem_is_pinned) return; // never reset in huge OS pages + if (segment->memid.is_pinned) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { @@ -474,23 +474,23 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se _mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)segment_size),tld); if (MI_SECURE != 0) { - mi_assert_internal(!segment->mem_is_pinned); + mi_assert_internal(!segment->memid.is_pinned); mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } bool fully_committed = true; - size_t committed = 0; + size_t committed_size = 0; const size_t page_size = mi_segment_raw_page_size(segment); for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (page->is_committed) { committed += page_size; } + if (page->is_committed) { committed_size += page_size; } if (!page->is_committed) { fully_committed = false; } } MI_UNUSED(fully_committed); - mi_assert_internal((fully_committed && committed == segment_size) || (!fully_committed && committed < segment_size)); + mi_assert_internal((fully_committed && committed_size == segment_size) || (!fully_committed && committed_size < segment_size)); _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged) - _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, committed, tld->stats); + _mi_arena_free(segment, segment_size, committed_size, segment->memid, tld->stats); } // called by threads that are terminating to free cached segments @@ -509,47 +509,42 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { Segment allocation ----------------------------------------------------------- */ -static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, mi_arena_id_t req_arena_id, - size_t pre_size, size_t info_size, - size_t* segment_size, bool* is_zero, bool* commit, mi_segments_tld_t* tld, mi_os_tld_t* tld_os) +static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, mi_arena_id_t req_arena_id, + size_t pre_size, size_t info_size, bool commit, size_t segment_size, + mi_segments_tld_t* tld, mi_os_tld_t* tld_os) { mi_memid_t memid; - bool mem_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy - bool is_pinned = false; + bool allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy size_t align_offset = 0; size_t alignment = MI_SEGMENT_SIZE; if (page_alignment > 0) { alignment = page_alignment; align_offset = _mi_align_up(pre_size, MI_SEGMENT_SIZE); - *segment_size = *segment_size + (align_offset - pre_size); + segment_size = segment_size + (align_offset - pre_size); // adjust the segment size } - // mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os); - mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, tld_os); - if (segment == NULL) return NULL; // failed to allocate - if (!(*commit)) { + mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, commit, allow_large, req_arena_id, &memid, tld_os); + if (segment == NULL) { + return NULL; // failed to allocate + } + + if (!memid.was_committed) { // ensure the initial info is committed - mi_assert_internal(!mem_large && !is_pinned); - bool commit_zero = false; - bool ok = _mi_os_commit(segment, pre_size, &commit_zero, tld_os->stats); - if (commit_zero) { *is_zero = true; } + mi_assert_internal(!memid.is_large && !memid.is_pinned); + bool ok = _mi_os_commit(segment, pre_size, NULL, tld_os->stats); if (!ok) { // commit failed; we cannot touch the memory: free the segment directly and return `NULL` - _mi_arena_free(segment, *segment_size, alignment, align_offset, memid, false, tld_os->stats); + _mi_arena_free(segment, segment_size, 0, memid, tld_os->stats); return NULL; } } - mi_track_mem_undefined(segment, info_size); MI_UNUSED(info_size); + mi_track_mem_undefined(segment, info_size); MI_UNUSED(info_size); segment->memid = memid; - segment->mem_is_pinned = is_pinned; - segment->mem_is_large = mem_large; - segment->mem_is_committed = commit; - segment->mem_alignment = alignment; - segment->mem_align_offset = align_offset; - segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; + segment->allow_decommit = !memid.is_pinned && !memid.is_large; segment->allow_purge = segment->allow_decommit && mi_option_is_enabled(mi_option_allow_purge); - mi_segments_track_size((long)(*segment_size), tld); + segment->segment_size = segment_size; + mi_segments_track_size((long)(segment_size), tld); _mi_segment_map_allocated_at(segment); return segment; } @@ -576,8 +571,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } size_t info_size; size_t pre_size; - size_t segment_size = mi_segment_calculate_sizes(capacity, required, &pre_size, &info_size); - mi_assert_internal(segment_size >= required); + const size_t init_segment_size = mi_segment_calculate_sizes(capacity, required, &pre_size, &info_size); + mi_assert_internal(init_segment_size >= required); // Initialize parameters const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && // don't delay for large objects @@ -585,39 +580,36 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, _mi_current_thread_count() > 1 && // do not delay for the first N threads tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); - bool is_zero = false; - + const bool init_commit = eager; // || (page_kind >= MI_PAGE_LARGE); + // Allocate the segment from the OS (segment_size can change due to alignment) - mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, &segment_size, &is_zero, &commit, tld, os_tld); + mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, init_commit, init_segment_size, tld, os_tld); if (segment == NULL) return NULL; mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - mi_assert_internal(segment->mem_is_pinned ? segment->mem_is_committed : true); + mi_assert_internal(segment->memid.is_pinned ? segment->memid.was_committed : true); mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan // zero the segment info (but not the `mem` fields) ptrdiff_t ofs = offsetof(mi_segment_t, next); - memset((uint8_t*)segment + ofs, 0, info_size - ofs); + _mi_memzero((uint8_t*)segment + ofs, info_size - ofs); // initialize pages info for (size_t i = 0; i < capacity; i++) { mi_assert_internal(i <= 255); segment->pages[i].segment_idx = (uint8_t)i; - segment->pages[i].is_committed = commit; - segment->pages[i].is_zero_init = is_zero; + segment->pages[i].is_committed = segment->memid.was_committed; + segment->pages[i].is_zero_init = segment->memid.was_zero; } // initialize segment->page_kind = page_kind; segment->capacity = capacity; segment->page_shift = page_shift; - segment->segment_size = segment_size; segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); - + // set protection mi_segment_protect(segment, true, tld->os); @@ -626,7 +618,6 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, mi_segment_insert_in_free_queue(segment, tld); } - //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } From d48b988c405601b5594be7db20177e9d7e223130 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 16 Apr 2023 11:21:45 -0700 Subject: [PATCH 050/102] clean up os api --- include/mimalloc/internal.h | 6 +++--- include/mimalloc/prim.h | 4 ++-- include/mimalloc/types.h | 5 ++++- src/arena.c | 22 ++++++++++------------ src/os.c | 26 +++++++++++--------------- src/segment.c | 4 ++-- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e346f120..fa932cda 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -103,9 +103,9 @@ bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* stats); -void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats); -void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats); +void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 094d7ab9..40f5d2d7 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -26,8 +26,8 @@ typedef struct mi_os_mem_config_s { size_t large_page_size; // 2MiB size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) bool has_overcommit; // can we reserve more memory than can be actually committed? - bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc) - bool has_virtual_reserve; // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) + bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) + bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) } mi_os_mem_config_t; // Initialize diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 18201ccf..2a5d172d 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -312,6 +312,10 @@ typedef struct mi_page_s { +// ------------------------------------------------------ +// Mimalloc segments contain mimalloc pages +// ------------------------------------------------------ + typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment @@ -346,7 +350,6 @@ typedef struct mi_memid_s { mi_memid_arena_info_t arena;// only used for MI_MEM_ARENA } mem; bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) - bool is_large; // `true` if the memory is in OS large (2MiB) or huge (1GiB) pages. (`is_pinned` will be true) bool was_committed; // `true` if the memory was originally allocated as committed bool was_zero; // `true` if the memory was originally zero initialized mi_memkind_t memkind; diff --git a/src/arena.c b/src/arena.c index 33c6e4a4..473bdc55 100644 --- a/src/arena.c +++ b/src/arena.c @@ -50,9 +50,9 @@ typedef struct mi_arena_s { mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? - bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL bool is_large; // large- or huge OS pages (always committed) bool is_huge_alloc; // huge OS pages allocated by `_mi_os_alloc_huge_pages` + bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? @@ -256,7 +256,6 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_ // claimed it! void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); - memid->is_large = arena->is_large; memid->is_pinned = (arena->is_large || !arena->allow_decommit); // none of the claimed blocks should be scheduled for a decommit @@ -434,13 +433,12 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset } // finally, fall back to the OS - bool os_large = allow_large; + bool os_is_large = false; bool os_is_zero = false; - void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, commit, &os_large, &os_is_zero, tld->stats); + void* p = _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, &os_is_large, &os_is_zero, tld->stats); if (p != NULL) { *memid = mi_memid_create_os(commit); - memid->is_large = os_large; - memid->is_pinned = os_large; + memid->is_pinned = os_is_large; memid->was_zero = os_is_zero; memid->mem.os.alignment = alignment; memid->mem.os.align_offset = align_offset; @@ -644,7 +642,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi _mi_stat_decrease(&stats->committed, committed_size); } if (memid.mem.os.align_offset != 0) { - _mi_os_free_aligned(p, size, memid.mem.os.alignment, memid.mem.os.align_offset, all_committed, stats); + _mi_os_free_aligned_at_offset(p, size, memid.mem.os.alignment, memid.mem.os.align_offset, all_committed, stats); } else { _mi_os_free(p, size, stats); @@ -857,16 +855,16 @@ static int mi_reserve_os_memory_ex2(size_t size, bool commit, bool allow_large, { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block - bool large = allow_large; - bool is_zero; - void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &is_zero, &_mi_stats_main); + bool is_large = false; + bool is_zero = false; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &is_large, &is_zero, &_mi_stats_main); if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory_ex2(start, size, (large || commit), large, false, is_zero, -1, exclusive, owned, arena_id)) { + if (!mi_manage_os_memory_ex2(start, size, (is_large || commit), is_large, false, is_zero, -1, exclusive, owned, arena_id)) { _mi_os_free_ex(start, size, commit, &_mi_stats_main); _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); return ENOMEM; } - _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : ""); + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), is_large ? " (in large os pages)" : ""); return 0; } diff --git a/src/os.c b/src/os.c index d657d5d5..cffeed29 100644 --- a/src/os.c +++ b/src/os.c @@ -275,23 +275,19 @@ void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* tld_stats) { return p; } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); - bool allow_large = false; - if (large != NULL) { - allow_large = *large; - *large = false; - } - bool is_largex = false; - bool is_zerox = false; - void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &is_largex, &is_zerox, &_mi_stats_main /*tld->stats*/ ); - if (large != NULL) { *large = is_largex; } - if (is_zero != NULL) { *is_zero = is_zerox; } + + bool os_is_large = false; + bool os_is_zero = false; + void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ ); + if (is_large != NULL) { *is_large = os_is_large; } + if (is_zero != NULL) { *is_zero = os_is_zero; } return p; } @@ -303,20 +299,20 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar to use the actual start of the memory region. ----------------------------------------------------------- */ -void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats) { +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { // regular aligned allocation - return _mi_os_alloc_aligned(size, alignment, commit, large, is_zero, tld_stats); + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, is_large, is_zero, tld_stats); } else { // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; - void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, is_zero, tld_stats); + void* start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, is_large, is_zero, tld_stats); if (start == NULL) return NULL; void* p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); @@ -328,7 +324,7 @@ void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, } } -void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { +void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { mi_assert(align_offset <= MI_SEGMENT_SIZE); const size_t extra = _mi_align_up(align_offset, alignment) - align_offset; void* start = (uint8_t*)p - extra; diff --git a/src/segment.c b/src/segment.c index 9485a55a..41c28065 100644 --- a/src/segment.c +++ b/src/segment.c @@ -530,7 +530,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme if (!memid.was_committed) { // ensure the initial info is committed - mi_assert_internal(!memid.is_large && !memid.is_pinned); + mi_assert_internal(!memid.is_pinned); bool ok = _mi_os_commit(segment, pre_size, NULL, tld_os->stats); if (!ok) { // commit failed; we cannot touch the memory: free the segment directly and return `NULL` @@ -541,7 +541,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme mi_track_mem_undefined(segment, info_size); MI_UNUSED(info_size); segment->memid = memid; - segment->allow_decommit = !memid.is_pinned && !memid.is_large; + segment->allow_decommit = !memid.is_pinned; segment->allow_purge = segment->allow_decommit && mi_option_is_enabled(mi_option_allow_purge); segment->segment_size = segment_size; mi_segments_track_size((long)(segment_size), tld); From a655c28b6664b059cee3d30b89771d5b53004800 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 16 Apr 2023 12:29:21 -0700 Subject: [PATCH 051/102] cleanup --- include/mimalloc/internal.h | 12 ++++++++---- src/arena.c | 28 ++++++++++++---------------- src/init.c | 2 +- src/os.c | 2 +- src/stats.c | 2 +- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index fa932cda..e0bcfaea 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -88,8 +88,10 @@ void _mi_thread_data_collect(void); // os.c void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats); // to allocate thread local data -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data +void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats); +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool is_committed, mi_stats_t* stats); + size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); @@ -106,13 +108,13 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* s void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats); void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); + void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, bool* is_zero); -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); +void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); // arena.c mi_arena_id_t _mi_arena_id_none(void); @@ -838,6 +840,8 @@ static inline void _mi_memzero(void* dst, size_t n) { } #endif +// initialize a local variable to zero; use memset as compilers optimize constant sized memset's +#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) // ------------------------------------------------------------------------------- // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned diff --git a/src/arena.c b/src/arena.c index 473bdc55..f9b844bc 100644 --- a/src/arena.c +++ b/src/arena.c @@ -96,22 +96,19 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus memory id's ----------------------------------------------------------- */ -static mi_memid_t mi_memid_none(void) { - mi_memid_t memid; - _mi_memzero(&memid, sizeof(memid)); - memid.memkind = MI_MEM_NONE; - return memid; -} - static mi_memid_t mi_memid_create(mi_memkind_t memkind) { - mi_memid_t memid = mi_memid_none(); + mi_memid_t memid; + _mi_memzero_var(memid); memid.memkind = memkind; return memid; } +static mi_memid_t mi_memid_none(void) { + return mi_memid_create(MI_MEM_NONE); +} + static mi_memid_t mi_memid_create_os(bool committed) { - mi_memid_t memid = mi_memid_none(); - memid.memkind = MI_MEM_OS; + mi_memid_t memid = mi_memid_create(MI_MEM_OS); memid.was_committed = committed; return memid; } @@ -163,11 +160,10 @@ static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bit /* ----------------------------------------------------------- Special static area for mimalloc internal structures - to avoid OS calls (for example, for the arena and thread - metadata) + to avoid OS calls (for example, for the arena metadata) ----------------------------------------------------------- */ -#define MI_ARENA_STATIC_MAX (MI_INTPTR_SIZE*8*MI_KiB) // 64 KiB on 64-bit +#define MI_ARENA_STATIC_MAX (MI_INTPTR_SIZE*MI_KiB) // 8 KiB on 64-bit static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; static _Atomic(size_t) mi_arena_static_top; @@ -209,7 +205,7 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st p = _mi_os_alloc(size, &is_zero, stats); if (p != NULL) { *memid = mi_memid_create_os(true); - if (!is_zero) { _mi_memzero(p, size); } + if (!is_zero) { _mi_memzero_aligned(p, size); } return p; } @@ -724,7 +720,7 @@ static void mi_arenas_unsafe_destroy(void) { if (arena->owned && arena->start != NULL) { mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); if (arena->is_huge_alloc) { - _mi_os_free_huge_pages(arena->start, mi_arena_size(arena), &_mi_stats_main); + _mi_os_free_huge_os_pages(arena->start, mi_arena_size(arena), &_mi_stats_main); } else { _mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main); @@ -938,7 +934,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); if (!mi_manage_os_memory_ex2(p, hsize, true, true, true, is_zero, numa_node, exclusive, true /* owned */, arena_id)) { - _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); + _mi_os_free_huge_os_pages(p, hsize, &_mi_stats_main); return ENOMEM; } return 0; diff --git a/src/init.c b/src/init.c index 8ab0f943..6dc61c48 100644 --- a/src/init.c +++ b/src/init.c @@ -217,7 +217,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { } if (td != NULL && !is_zero) { - _mi_memzero(td, sizeof(*td)); + _mi_memzero_aligned(td, sizeof(*td)); } return td; } diff --git a/src/os.c b/src/os.c index cffeed29..cf53472f 100644 --- a/src/os.c +++ b/src/os.c @@ -599,7 +599,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { +void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { diff --git a/src/stats.c b/src/stats.c index 448248c7..a8eac648 100644 --- a/src/stats.c +++ b/src/stats.c @@ -444,7 +444,7 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { mi_process_info_t pinfo; - _mi_memzero(&pinfo,sizeof(pinfo)); + _mi_memzero_var(pinfo); pinfo.elapsed = _mi_clock_end(mi_process_start); pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); From 4a52b7e4c4cb85cc62552eb73822be3ce95d4b8d Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 16 Apr 2023 12:32:43 -0700 Subject: [PATCH 052/102] use memzero_aligned --- src/alloc-aligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 674c74fe..7af194bd 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -79,7 +79,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* // for the tracker, on huge aligned allocations only from the start of the large block is defined mi_track_mem_undefined(aligned_p, size); if (zero) { - _mi_memzero(aligned_p, mi_usable_size(aligned_p)); + _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); } } From 89e092d795d283c5924aa7dee089f1c934e5944d Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 16 Apr 2023 16:43:11 -0700 Subject: [PATCH 053/102] increase retire cycles to 16 --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index b1de1612..3445e504 100644 --- a/src/page.c +++ b/src/page.c @@ -418,7 +418,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { } #define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX -#define MI_RETIRE_CYCLES (8) +#define MI_RETIRE_CYCLES (16) // Retire a page with no more used blocks // Important to not retire too quickly though as new From a8b9f2878ae25b5313ac9d38df52dc2ec2ecd7cb Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 16 Apr 2023 17:01:59 -0700 Subject: [PATCH 054/102] better alignment test --- src/alloc-aligned.c | 12 ++---------- src/alloc-posix.c | 4 ++-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 7af194bd..65b28748 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -93,21 +93,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. - mi_assert(alignment > 0); if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } - /* - if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers) - #if MI_DEBUG > 0 - _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment); - #endif - return NULL; - } - */ + if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); @@ -147,9 +139,9 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he } mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { + if (alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; #if !MI_PADDING // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) - if (!_mi_is_power_of_two(alignment)) return NULL; if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) #else // with padding, we can only guarantee this for fixed alignments diff --git a/src/alloc-posix.c b/src/alloc-posix.c index b6f09d1a..0472f876 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -56,8 +56,8 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept // Note: The spec dictates we should not modify `*p` on an error. (issue#27) // if (p == NULL) return EINVAL; - if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment - if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 + if ((alignment % sizeof(void*)) != 0) return EINVAL; // natural alignment + // it is also required that alignment is a power of 2 and > 0; this is checked in `mi_malloc_aligned` void* q = mi_malloc_aligned(size, alignment); if (q==NULL && size != 0) return ENOMEM; mi_assert_internal(((uintptr_t)q % alignment) == 0); From 4436fadd09146b378032c2b7d1962c0bd38ba96e Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 16 Apr 2023 17:05:56 -0700 Subject: [PATCH 055/102] update alignment check --- src/alloc-posix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 0472f876..225752fd 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -58,6 +58,7 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept if (p == NULL) return EINVAL; if ((alignment % sizeof(void*)) != 0) return EINVAL; // natural alignment // it is also required that alignment is a power of 2 and > 0; this is checked in `mi_malloc_aligned` + if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 void* q = mi_malloc_aligned(size, alignment); if (q==NULL && size != 0) return ENOMEM; mi_assert_internal(((uintptr_t)q % alignment) == 0); From a52ba65f656d5ce7b19fad3943de0d04d7110d37 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 09:13:08 -0700 Subject: [PATCH 056/102] further fix for atomic build error suggested by Davide Di Gennaro (issue #729, pr #724) --- include/mimalloc/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 1951b470..130ef820 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -281,7 +281,7 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t; static inline bool mi_atomic_once( mi_atomic_once_t* once ) { if (mi_atomic_load_relaxed(once) != 0) return false; // quick test uintptr_t expected = 0; - return mi_atomic_cas_strong_acq_rel(once, &expected, 1UL); // try to set to 1 + return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 } // Yield From 43be9c29092603f072d1584c9ca056f1133e3cfd Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 09:25:41 -0700 Subject: [PATCH 057/102] possible fix for warnings on armv6 (issue #719) --- include/mimalloc/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 28343d21..bef23d51 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -165,7 +165,7 @@ typedef int32_t mi_ssize_t; // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)< Date: Mon, 17 Apr 2023 09:13:08 -0700 Subject: [PATCH 058/102] further fix for atomic build error suggested by Davide Di Gennaro (issue #729, pr #724) --- include/mimalloc/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 1951b470..130ef820 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -281,7 +281,7 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t; static inline bool mi_atomic_once( mi_atomic_once_t* once ) { if (mi_atomic_load_relaxed(once) != 0) return false; // quick test uintptr_t expected = 0; - return mi_atomic_cas_strong_acq_rel(once, &expected, 1UL); // try to set to 1 + return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 } // Yield From f42c66ccceffc2ce54ba2f07cde63b7777341f41 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 10:13:36 -0700 Subject: [PATCH 059/102] fix build on vs2017/2019 --- ide/vs2017/mimalloc-override.vcxproj | 2 +- ide/vs2017/mimalloc-override.vcxproj.filters | 6 +++--- ide/vs2017/mimalloc.vcxproj | 2 +- ide/vs2017/mimalloc.vcxproj.filters | 6 +++--- ide/vs2019/mimalloc-override.vcxproj | 2 +- ide/vs2019/mimalloc-override.vcxproj.filters | 6 +++--- ide/vs2019/mimalloc.vcxproj | 2 +- ide/vs2019/mimalloc.vcxproj.filters | 6 +++--- src/init.c | 3 +++ 9 files changed, 19 insertions(+), 16 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 0d11068b..3d5c1f75 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -239,7 +239,6 @@ - @@ -250,6 +249,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 009962dd..70f84d59 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -70,9 +70,6 @@ Source Files - - Source Files - Source Files @@ -91,5 +88,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 05024448..46eb05d8 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -228,7 +228,6 @@ - true @@ -238,6 +237,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 249757b6..0c2bd522 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -44,9 +44,6 @@ Source Files - - Source Files - Source Files @@ -62,6 +59,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index d80133e7..1c5c61b7 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -239,7 +239,6 @@ - @@ -250,6 +249,7 @@ + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index 357a9a2f..370c8ab3 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -19,9 +19,6 @@ Source Files - - Source Files - Source Files @@ -52,6 +49,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 79146c99..0e2eb312 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -226,7 +226,6 @@ true true - true @@ -236,6 +235,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 9b215312..21f9c517 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -19,9 +19,6 @@ Source Files - - Source Files - Source Files @@ -55,6 +52,9 @@ Source Files + + Source Files + diff --git a/src/init.c b/src/init.c index 6dc61c48..78618627 100644 --- a/src/init.c +++ b/src/init.c @@ -529,6 +529,9 @@ static void mi_detect_cpu_features(void) { void mi_process_init(void) mi_attr_noexcept { // ensure we are called once static mi_atomic_once_t process_init; + #if _MSC_VER < 1920 + mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main + #endif if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); From aa1ba045ac51cb64a29127dfcf11d9f8c19c4eaf Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 10:19:40 -0700 Subject: [PATCH 060/102] increase pipeline timeout for tsan --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5054a532..c215ab4d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -121,7 +121,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 180 + - script: ctest --verbose --timeout 300 workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) From 6d42f2ac390e70424ffd402449a50c74079fd455 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 11:35:51 -0700 Subject: [PATCH 061/102] increase pipeline timeout to 10min for tsan --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c0f7ec74..1804be26 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -121,7 +121,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 300 + - script: ctest --verbose --timeout 600 workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) From 93175c98718584b19dff7f8bd24593c2ba051e83 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 11:36:11 -0700 Subject: [PATCH 062/102] increase pipeline timeout to 10min for tsan --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c215ab4d..c0725e3a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -121,7 +121,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 300 + - script: ctest --verbose --timeout 600 workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) From e03f26b0356090c657870f6446f7f8848c99b6ad Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 11:39:55 -0700 Subject: [PATCH 063/102] fix atomic signature --- include/mimalloc/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 857e9f02..61577833 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -290,8 +290,8 @@ typedef _Atomic(uintptr_t) mi_atomic_guard_t; #define mi_atomic_guard(guard) \ uintptr_t _mi_guard_expected = 0; \ for(bool _mi_guard_once = true; \ - _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,1); \ - (mi_atomic_store_release(guard,0), _mi_guard_once = false) ) + _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \ + (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) ) From 63ddc31d3f20738a9641b32fc99cf2187c35c20f Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 12:01:00 -0700 Subject: [PATCH 064/102] fix 32-bit compile warnings --- include/mimalloc/atomic.h | 25 +++++++++++++++++++++---- src/arena.c | 6 +++--- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 61577833..d0226029 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -113,11 +113,13 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { } // Used by timers -#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) -#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) -#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) -#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_casi64_strong_acq_rel(p,e,d) mi_atomic_cas_strong_acq_rel(p,e,d) +#define mi_atomic_addi64_acq_rel(p,i) mi_atomic_add_acq_rel(p,i) #elif defined(_MSC_VER) @@ -245,6 +247,21 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } +static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) { + mi_atomic_addi64_relaxed(p, i); +} + +static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) { + int64_t read = _InterlockedCompareExchange64(p, des, *exp); + if (read == *exp) { + return true; + } + else { + *exp = read; + return false; + } +} + // The pointer macros cast to `uintptr_t`. #define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) #define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) diff --git a/src/arena.c b/src/arena.c index f9b844bc..d3ffe3b7 100644 --- a/src/arena.c +++ b/src/arena.c @@ -506,7 +506,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t // schedule decommit mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (expire != 0) { - mi_atomic_add_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay + mi_atomic_addi64_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay } else { mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); @@ -547,7 +547,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi if (!force && expire > now) return false; // reset expire (if not already set concurrently) - mi_atomic_cas_strong_acq_rel(&arena->purge_expire, &expire, 0); + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0); // potential purges scheduled, walk through the bitmap bool any_purged = false; @@ -589,7 +589,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi if (!full_purge) { const long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); mi_msecs_t expected = 0; - mi_atomic_cas_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); } return any_purged; } From 72f3ba95a86b766d5aced3704fc8012b32cf87d9 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 17 Apr 2023 12:08:22 -0700 Subject: [PATCH 065/102] reduce iterations under tsan --- test/test-stress.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test-stress.c b/test/test-stress.c index c6236b77..c0de8910 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -26,7 +26,12 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 25; // scaling factor + +#if defined(MI_TSAN) +static int ITER = 10; // N full iterations destructing and re-creating all threads (on tsan reduce for azure pipeline limits) +#else static int ITER = 50; // N full iterations destructing and re-creating all threads +#endif // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor From 4a867ffd98b7bcf47ac395a0edf25ddd3acfc651 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 12:24:01 -0700 Subject: [PATCH 066/102] disable tsan as it times out on the azure pipeline --- azure-pipelines.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c0725e3a..27dfa3e1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -108,11 +108,12 @@ jobs: CXX: clang++ BuildType: debug-ubsan-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_DEBUG_UBSAN=ON - Debug TSAN Clang++: - CC: clang - CXX: clang++ - BuildType: debug-tsan-clang-cxx - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON + # Disable for now as it times out on the azure build machines + # Debug TSAN Clang++: + # CC: clang + # CXX: clang++ + # BuildType: debug-tsan-clang-cxx + # cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON steps: - task: CMake@1 @@ -121,7 +122,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 600 + - script: ctest --verbose --timeout 180 workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) From 015aac05a5938ec82a45e5fe101de2adb6f7f6a8 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 12:24:51 -0700 Subject: [PATCH 067/102] keep tsan enabled for dev-slice --- azure-pipelines.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cb9b08fe..0247c76f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -108,12 +108,11 @@ jobs: CXX: clang++ BuildType: debug-ubsan-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_DEBUG_UBSAN=ON - # Disable for now as it times out on the azure build machines - # Debug TSAN Clang++: - # CC: clang - # CXX: clang++ - # BuildType: debug-tsan-clang-cxx - # cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON + Debug TSAN Clang++: + CC: clang + CXX: clang++ + BuildType: debug-tsan-clang-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON steps: - task: CMake@1 From 91cd6e2560b9bf7684690b7364b52ff60a62d750 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 13:01:08 -0700 Subject: [PATCH 068/102] add filter files for vs2022 --- ide/vs2022/mimalloc-override.vcxproj.filters | 107 +++++++++++++++++++ ide/vs2022/mimalloc.vcxproj.filters | 99 +++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 ide/vs2022/mimalloc-override.vcxproj.filters create mode 100644 ide/vs2022/mimalloc.vcxproj.filters diff --git a/ide/vs2022/mimalloc-override.vcxproj.filters b/ide/vs2022/mimalloc-override.vcxproj.filters new file mode 100644 index 00000000..0f105c1a --- /dev/null +++ b/ide/vs2022/mimalloc-override.vcxproj.filters @@ -0,0 +1,107 @@ + + + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + + + {9ef1cf48-7bb2-4af1-8cc1-603486e08a7a} + + + {cfcf1674-81e3-487a-a8dd-5f956ae4007d} + + + + + Headers + + + \ No newline at end of file diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters new file mode 100644 index 00000000..28ba20b1 --- /dev/null +++ b/ide/vs2022/mimalloc.vcxproj.filters @@ -0,0 +1,99 @@ + + + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + Sources + + + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + Headers + + + + + {dd2da697-c33c-4348-bf80-a802ebaa06fb} + + + {8027057b-4b93-4321-b93c-d51dd0c8077b} + + + \ No newline at end of file From 10a6b303bebc53702de53fb36f5c786522d71450 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 13:34:23 -0700 Subject: [PATCH 069/102] review bitmap.c --- src/bitmap.c | 89 +++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/src/bitmap.c b/src/bitmap.c index bdd1c869..98b53052 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -11,7 +11,6 @@ represeted as an array of fields where each field is a machine word (`size_t`) There are two api's; the standard one cannot have sequences that cross between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). -(this is used in region allocation) The `_across` postfixed functions do allow sequences that can cross over between the fields. (This is used in arena allocation) @@ -63,12 +62,12 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ // scan linearly for a free range of zero bits while (bitidx <= bitidx_max) { - const size_t mapm = map & m; + const size_t mapm = (map & m); if (mapm == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? - const size_t newmap = map | m; + const size_t newmap = (map | m); mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here? // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } @@ -81,7 +80,8 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ else { // on to the next bit range #ifdef MI_HAVE_FAST_BITSCAN - const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); + mi_assert_internal(mapm != 0); + const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); mi_assert_internal(shift > 0 && shift <= count); #else const size_t shift = 1; @@ -100,7 +100,7 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap + if (idx >= bitmap_fields) { idx = 0; } // wrap if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } @@ -108,13 +108,6 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel return false; } -/* -// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { - return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); -} -*/ // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. @@ -124,7 +117,7 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); - size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); + const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); } @@ -138,7 +131,7 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); - if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); } return ((prev & mask) == 0); } @@ -148,8 +141,8 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - size_t field = mi_atomic_load_relaxed(&bitmap[idx]); - if (any_ones != NULL) *any_ones = ((field & mask) != 0); + const size_t field = mi_atomic_load_relaxed(&bitmap[idx]); + if (any_ones != NULL) { *any_ones = ((field & mask) != 0); } return ((field & mask) == mask); } @@ -160,10 +153,13 @@ bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - size_t expected = 0; - if (mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, mask)) return true; - if ((expected & mask) != 0) return false; - return mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask); + size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); + do { + if ((expected & mask) != 0) return false; + } + while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); + mi_assert_internal((expected & mask) == 0); + return true; } @@ -185,6 +181,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Try to atomically claim a sequence of `count` bits starting from the field // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. +// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`) static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); @@ -195,9 +192,9 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit const size_t initial = mi_clz(map); // count of initial zeros starting at idx mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); if (initial == 0) return false; - if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields + if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries - + // scan ahead size_t found = initial; size_t mask = 0; // mask bits for the final field @@ -205,25 +202,27 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit field++; map = mi_atomic_load_relaxed(field); const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS); mask = mi_bitmap_mask_(mask_bits, 0); - if ((map & mask) != 0) return false; + if ((map & mask) != 0) return false; // some part is already claimed found += mask_bits; } mi_assert_internal(field < &bitmap[bitmap_fields]); - // found range of zeros up to the final field; mask contains mask in the final field - // now claim it atomically + // we found a range of contiguous zeros up to the final field; mask contains mask in the final field + // now try to claim the range atomically mi_bitmap_field_t* const final_field = field; const size_t final_mask = mask; mi_bitmap_field_t* const initial_field = &bitmap[idx]; - const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); + const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial; + const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx); // initial field size_t newmap; field = initial_field; map = mi_atomic_load_relaxed(field); do { - newmap = map | initial_mask; + newmap = (map | initial_mask); if ((map & initial_mask) != 0) { goto rollback; }; } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); @@ -238,31 +237,32 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit mi_assert_internal(field == final_field); map = mi_atomic_load_relaxed(field); do { - newmap = map | final_mask; + newmap = (map | final_mask); if ((map & final_mask) != 0) { goto rollback; } } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); // claimed! - *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); + *bitmap_idx = mi_bitmap_index_create(idx, initial_idx); return true; rollback: // roll back intermediate fields + // (we just failed to claim `field` so decrement first) while (--field > initial_field) { newmap = 0; map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); mi_assert_internal(mi_atomic_load_relaxed(field) == map); mi_atomic_store_release(field, newmap); } - if (field == initial_field) { + if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`) map = mi_atomic_load_relaxed(field); do { mi_assert_internal((map & initial_mask) == initial_mask); - newmap = map & ~initial_mask; + newmap = (map & ~initial_mask); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); } // retry? (we make a recursive call instead of goto to be able to use const declarations) - if (retries < 4) { + if (retries <= 2) { return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); } else { @@ -275,17 +275,22 @@ rollback: // Starts at idx, and wraps around to search in all `bitmap_fields` fields. bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(count > 0); - if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + if (count <= 2) { + // we don't bother with crossover fields for small counts + return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + } + + // visit the fields size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap - // try to claim inside the field + if (idx >= bitmap_fields) { idx = 0; } // wrap + // first try to claim inside a field if (count <= MI_BITMAP_FIELD_BITS) { if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } - // try to claim across fields + // if that fails, then try to claim across fields if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { return true; } @@ -328,14 +333,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_one = true; mi_bitmap_field_t* field = &bitmap[idx]; - size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); + size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part if ((prev & pre_mask) != pre_mask) all_one = false; while(mid_count-- > 0) { - prev = mi_atomic_and_acq_rel(field++, ~mid_mask); + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part if ((prev & mid_mask) != mid_mask) all_one = false; } if (post_mask!=0) { - prev = mi_atomic_and_acq_rel(field, ~post_mask); + prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part if ((prev & post_mask) != post_mask) all_one = false; } return all_one; @@ -365,7 +370,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != post_mask) any_zero = true; } - if (pany_zero != NULL) *pany_zero = any_zero; + if (pany_zero != NULL) { *pany_zero = any_zero; } return all_zero; } @@ -394,7 +399,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != 0) any_ones = true; } - if (pany_ones != NULL) *pany_ones = any_ones; + if (pany_ones != NULL) { *pany_ones = any_ones; } return all_ones; } From 2f878354f401baf6ac4c4d6c5a0bb53d3356590b Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 14:13:55 -0700 Subject: [PATCH 070/102] add arena contains check for valid pointers --- src/bitmap.c | 2 +- src/segment-map.c | 2 +- test/test-stress.c | 9 ++++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/bitmap.c b/src/bitmap.c index 98b53052..01064140 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2021 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/src/segment-map.c b/src/segment-map.c index 56b18531..4c2104bd 100644 --- a/src/segment-map.c +++ b/src/segment-map.c @@ -125,7 +125,7 @@ static mi_segment_t* _mi_segment_of(const void* p) { // Is this a valid pointer in our heap? static bool mi_is_valid_pointer(const void* p) { - return (_mi_segment_of(p) != NULL); + return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p))); } mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { diff --git a/test/test-stress.c b/test/test-stress.c index c0de8910..0d6d0bc9 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -38,8 +38,8 @@ static int ITER = 50; // N full iterations destructing and re-creating a #define STRESS // undefine for leak test -static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? +static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100) +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? // #define USE_STD_MALLOC @@ -244,7 +244,10 @@ int main(int argc, char** argv) { long n = (strtol(argv[3], &end, 10)); if (n > 0) ITER = n; } - printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); + if (SCALE > 100) { + allow_large_objects = true; + } + printf("Using %d threads with a %d%% load-per-thread and %d iterations %s\n", THREADS, SCALE, ITER, (allow_large_objects ? "(allow large objects)" : "")); //mi_reserve_os_memory(1024*1024*1024ULL, false, true); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); From b845be241aec2cb3e3dfd2e9fb45bbb5233ea459 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 16:38:47 -0700 Subject: [PATCH 071/102] wip: use memid for OS allocation --- include/mimalloc/internal.h | 48 ++++++++--- include/mimalloc/types.h | 9 +- src/arena.c | 168 ++++++++++++------------------------ src/init.c | 14 ++- src/os.c | 115 ++++++++++++++---------- 5 files changed, 178 insertions(+), 176 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e0bcfaea..b320f690 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -88,9 +88,9 @@ void _mi_thread_data_collect(void); // os.c void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats); -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -void _mi_os_free_ex(void* p, size_t size, bool is_committed, mi_stats_t* stats); +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); @@ -105,16 +105,14 @@ bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats); -void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats); -void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, bool* is_zero); -void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); // arena.c mi_arena_id_t _mi_arena_id_none(void); @@ -268,6 +266,10 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) +#include +// initialize a local variable to zero; use memset as compilers optimize constant sized memset's +#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) + // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); @@ -653,6 +655,30 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } + +/* ----------------------------------------------------------- + memory id's +----------------------------------------------------------- */ + +static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { + mi_memid_t memid; + _mi_memzero_var(memid); + memid.memkind = memkind; + return memid; +} + +static inline mi_memid_t _mi_memid_none(void) { + return _mi_memid_create(MI_MEM_NONE); +} + +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero) { + mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.was_committed = committed; + memid.was_zero = is_zero; + return memid; +} + + // ------------------------------------------------------------------- // Fast "random" shuffle // ------------------------------------------------------------------- @@ -812,7 +838,6 @@ static inline size_t mi_bsr(uintptr_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include -#include extern bool _mi_cpu_has_fsrm; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if (_mi_cpu_has_fsrm) { @@ -831,7 +856,6 @@ static inline void _mi_memzero(void* dst, size_t n) { } } #else -#include static inline void _mi_memcpy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); } @@ -840,9 +864,6 @@ static inline void _mi_memzero(void* dst, size_t n) { } #endif -// initialize a local variable to zero; use memset as compilers optimize constant sized memset's -#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) - // ------------------------------------------------------------------------------- // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // This is used for example in `mi_realloc`. @@ -850,7 +871,6 @@ static inline void _mi_memzero(void* dst, size_t n) { #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // On GCC/CLang we provide a hint that the pointers are word aligned. -#include static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 85d7e813..60408ed3 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -330,12 +330,17 @@ typedef enum mi_memkind_e { MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) MI_MEM_OS, // allocated from the OS + MI_MEM_OS_HUGE, // allocated as huge os pages + MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) MI_MEM_ARENA // allocated from an arena (the usual case) } mi_memkind_t; +static inline bool mi_memkind_is_os(mi_memkind_t memkind) { + return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); +} + typedef struct mi_memid_os_info { - size_t alignment; // allocated with the given alignment - size_t align_offset; // the offset that was aligned (used only for huge aligned pages) + void* base; // actual base address of the block (used for offset aligned allocations) } mi_memid_os_info_t; typedef struct mi_memid_arena_info { diff --git a/src/arena.c b/src/arena.c index d3ffe3b7..04d5c49c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -41,18 +41,15 @@ typedef uintptr_t mi_block_info_t; // A memory arena descriptor typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific - bool exclusive; // only allow allocations if specifically for this arena - bool owned; // if true, the arena will be released when the process exits if `mi_option_destroy_on_exit` is set. + mi_memid_t memid; // memid of the memory area _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t meta_size; // size of the arena structure itself (including its bitmaps) mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node - bool is_zero_init; // is the arena zero initialized? - bool is_large; // large- or huge OS pages (always committed) - bool is_huge_alloc; // huge OS pages allocated by `_mi_os_alloc_huge_pages` - bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL + bool exclusive; // only allow allocations if specifically for this arena + bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? @@ -67,7 +64,7 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 -static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept; +//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept; /* ----------------------------------------------------------- Arena id's @@ -92,27 +89,6 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus (arena_id == req_arena_id)); } -/* ----------------------------------------------------------- - memory id's ------------------------------------------------------------ */ - -static mi_memid_t mi_memid_create(mi_memkind_t memkind) { - mi_memid_t memid; - _mi_memzero_var(memid); - memid.memkind = memkind; - return memid; -} - -static mi_memid_t mi_memid_none(void) { - return mi_memid_create(MI_MEM_NONE); -} - -static mi_memid_t mi_memid_create_os(bool committed) { - mi_memid_t memid = mi_memid_create(MI_MEM_OS); - memid.was_committed = committed; - return memid; -} - bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { if (memid.memkind == MI_MEM_ARENA) { return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); @@ -123,7 +99,6 @@ bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_i } - /* ----------------------------------------------------------- Arena allocations get a (currently) 16-bit memory id where the lower 8 bits are the arena id, and the upper bits the block index. @@ -142,7 +117,7 @@ static size_t mi_arena_size(mi_arena_t* arena) { } static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { - mi_memid_t memid = mi_memid_create(MI_MEM_ARENA); + mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); memid.mem.arena.id = id; memid.mem.arena.block_index = bitmap_index; memid.mem.arena.is_exclusive = is_exclusive; @@ -169,7 +144,7 @@ static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; static _Atomic(size_t) mi_arena_static_top; static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { - *memid = mi_memid_none(); + *memid = _mi_memid_none(); if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; if (mi_atomic_load_relaxed(&mi_arena_static_top) >= MI_ARENA_STATIC_MAX) return NULL; @@ -186,7 +161,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m } // success - *memid = mi_memid_create(MI_MEM_STATIC); + *memid = _mi_memid_create(MI_MEM_STATIC); const size_t start = _mi_align_up(oldtop, alignment); uint8_t* const p = &mi_arena_static[start]; _mi_memzero(p, size); @@ -194,27 +169,22 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m } static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { - *memid = mi_memid_none(); + *memid = _mi_memid_none(); // try static void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid); if (p != NULL) return p; // or fall back to the OS - bool is_zero = false; - p = _mi_os_alloc(size, &is_zero, stats); - if (p != NULL) { - *memid = mi_memid_create_os(true); - if (!is_zero) { _mi_memzero_aligned(p, size); } - return p; - } - - return NULL; + return _mi_os_alloc(size, memid, stats); } static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { - if (memid.memkind == MI_MEM_OS) { - _mi_os_free(p, size, stats); + if (mi_memkind_is_os(memid.memkind)) { + _mi_os_free(p, size, memid, stats); + } + else { + mi_assert(memid.memkind == MI_MEM_STATIC); } } @@ -252,7 +222,7 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_ // claimed it! void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); - memid->is_pinned = (arena->is_large || !arena->allow_decommit); + memid->is_pinned = arena->memid.is_pinned; // none of the claimed blocks should be scheduled for a decommit if (arena->blocks_purge != NULL) { @@ -404,7 +374,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *memid = mi_memid_none(); + *memid = _mi_memid_none(); const int numa_node = _mi_os_numa_node(tld); // current numa node @@ -429,17 +399,12 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset } // finally, fall back to the OS - bool os_is_large = false; - bool os_is_zero = false; - void* p = _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, &os_is_large, &os_is_zero, tld->stats); - if (p != NULL) { - *memid = mi_memid_create_os(commit); - memid->is_pinned = os_is_large; - memid->was_zero = os_is_zero; - memid->mem.os.alignment = alignment; - memid->mem.os.align_offset = align_offset; + if (align_offset > 0) { + return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); + } + else { + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); } - return p; } void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) @@ -467,7 +432,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); - mi_assert_internal(arena->allow_decommit); + mi_assert_internal(!arena->memid.is_pinned); const size_t size = mi_arena_block_size(blocks); void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx)); bool needs_recommit; @@ -541,7 +506,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, // returns true if anything was purged static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) { - if (!arena->allow_decommit || arena->blocks_purge == NULL) return false; + if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (expire == 0) return false; if (!force && expire > now) return false; @@ -631,18 +596,13 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (size==0) return; const bool all_committed = (committed_size == size); - if (memid.memkind == MI_MEM_OS) { + if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through if (!all_committed && committed_size > 0) { // if partially committed, adjust the committed stats _mi_stat_decrease(&stats->committed, committed_size); } - if (memid.mem.os.align_offset != 0) { - _mi_os_free_aligned_at_offset(p, size, memid.mem.os.alignment, memid.mem.os.align_offset, all_committed, stats); - } - else { - _mi_os_free(p, size, stats); - } + _mi_os_free(p, size, memid, stats); } else if (memid.memkind == MI_MEM_ARENA) { // allocated in an arena @@ -669,7 +629,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_track_mem_undefined(p,size); // potentially decommit - if (!arena->allow_decommit || arena->blocks_committed == NULL) { + if (arena->memid.is_pinned || arena->blocks_committed == NULL) { mi_assert_internal(all_committed); } else { @@ -717,14 +677,9 @@ static void mi_arenas_unsafe_destroy(void) { for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { - if (arena->owned && arena->start != NULL) { + if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); - if (arena->is_huge_alloc) { - _mi_os_free_huge_os_pages(arena->start, mi_arena_size(arena), &_mi_stats_main); - } - else { - _mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main); - } + _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); } else { new_max_arena = i; @@ -784,21 +739,18 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) { return true; } -static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (size < MI_ARENA_BLOCK_SIZE) return false; if (is_large) { - mi_assert_internal(is_committed); - is_committed = true; + mi_assert_internal(memid.was_committed && memid.is_pinned); } - const bool allow_decommit = !is_large; // && !is_committed; // only allow decommit for initially uncommitted memory - const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - const size_t bitmaps = (allow_decommit ? 4 : 2); + const size_t bitmaps = (memid.is_pinned ? 2 : 4); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); mi_memid_t meta_memid; mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? @@ -807,8 +759,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, // already zero'd due to os_alloc // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); + arena->memid = memid; arena->exclusive = exclusive; - arena->owned = owned; arena->meta_size = asize; arena->meta_memid = meta_memid; arena->block_count = bcount; @@ -816,16 +768,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, arena->start = (uint8_t*)start; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; - arena->is_huge_alloc= is_huge_alloc; - arena->is_zero_init = is_zero; - arena->allow_decommit = allow_decommit; arena->purge_expire = 0; arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap - arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap - arena->blocks_purge = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap + arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap + arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap // initialize committed bitmap? - if (arena->blocks_committed != NULL && is_committed) { + if (arena->blocks_committed != NULL && arena->memid.was_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning } @@ -842,31 +791,28 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, } bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - return mi_manage_os_memory_ex2(start,size,is_committed,is_large,false,is_zero,numa_node,exclusive,false /* not owned */, arena_id); -} - - -// Reserve a range of regular OS memory -static int mi_reserve_os_memory_ex2(size_t size, bool commit, bool allow_large, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept -{ - if (arena_id != NULL) *arena_id = _mi_arena_id_none(); - size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block - bool is_large = false; - bool is_zero = false; - void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &is_large, &is_zero, &_mi_stats_main); - if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory_ex2(start, size, (is_large || commit), is_large, false, is_zero, -1, exclusive, owned, arena_id)) { - _mi_os_free_ex(start, size, commit, &_mi_stats_main); - _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); - return ENOMEM; - } - _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), is_large ? " (in large os pages)" : ""); - return 0; + mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); + memid.was_committed = is_committed; + memid.was_zero = is_zero; + memid.is_pinned = is_large; + return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id); } // Reserve a range of regular OS memory int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - return mi_reserve_os_memory_ex2(size,commit,allow_large,exclusive,true /*owned*/, arena_id); + if (arena_id != NULL) *arena_id = _mi_arena_id_none(); + size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block + mi_memid_t memid; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main); + if (start == NULL) return ENOMEM; + const bool is_large = memid.is_pinned; // todo: use separate is_large field? + if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024)); + return ENOMEM; + } + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); + return 0; } @@ -925,16 +871,16 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - bool is_zero = false; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &is_zero); + mi_memid_t memid; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); return ENOMEM; } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex2(p, hsize, true, true, true, is_zero, numa_node, exclusive, true /* owned */, arena_id)) { - _mi_os_free_huge_os_pages(p, hsize, &_mi_stats_main); + if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + _mi_os_free(p, hsize, memid, &_mi_stats_main); return ENOMEM; } return 0; diff --git a/src/init.c b/src/init.c index 78618627..cd2fd643 100644 --- a/src/init.c +++ b/src/init.c @@ -177,6 +177,7 @@ mi_heap_t* _mi_heap_main_get(void) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_memid_t memid; } mi_thread_data_t; @@ -205,15 +206,20 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { // if that fails, allocate as meta data if (td == NULL) { - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main); + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main); + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { // really out of memory _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); } } + if (td != NULL) { + td->memid = memid; + is_zero = memid.was_zero; + } } if (td != NULL && !is_zero) { @@ -234,7 +240,7 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) { } } // if that fails, just free it directly - _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); + _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); } void _mi_thread_data_collect(void) { @@ -244,7 +250,7 @@ void _mi_thread_data_collect(void) { if (td != NULL) { td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); + _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main); } } } diff --git a/src/os.c b/src/os.c index cf53472f..589399a4 100644 --- a/src/os.c +++ b/src/os.c @@ -148,7 +148,9 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { Free memory -------------------------------------------------------------- */ -static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); + +static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) @@ -157,18 +159,34 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } mi_stats_t* stats = &_mi_stats_main; - if (was_committed) { _mi_stat_decrease(&stats->committed, size); } + if (still_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); } - -void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { - const size_t csize = _mi_os_good_alloc_size(size); - mi_os_mem_free(addr,csize,was_committed,tld_stats); +void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) { + size_t csize = _mi_os_good_alloc_size(size); + if (memid.memkind == MI_MEM_OS) { + if (memid.mem.os.base != NULL) { + mi_assert(memid.mem.os.base <= addr); + csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); + mi_os_prim_free(memid.mem.os.base, csize, still_committed, tld_stats); + } + else { + mi_os_prim_free(addr, csize, still_committed, tld_stats); + } + } + else if (memid.memkind == MI_MEM_OS_HUGE) { + mi_assert(memid.is_pinned); + mi_os_free_huge_os_pages(addr, size, tld_stats); + } + else { + // nothing to do + mi_assert(memid.memkind <= MI_MEM_EXTERNAL); + } } -void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { - _mi_os_free_ex(p, size, true, tld_stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) { + _mi_os_free_ex(p, size, true, memid, tld_stats); } @@ -177,7 +195,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { -------------------------------------------------------------- */ // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { +static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_zero != NULL); mi_assert_internal(is_large != NULL); @@ -202,7 +220,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { +static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_large != NULL); @@ -212,19 +230,19 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size = _mi_align_up(size, _mi_os_page_size()); // try first with a hint (this will be aligned directly on Win 10+ or BSD) - void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); + void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); - mi_os_mem_free(p, size, commit, stats); + mi_os_prim_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block // over-allocate uncommitted (virtual) memory - p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); + p = mi_os_prim_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); if (p == NULL) return NULL; // set p to the aligned part in the full region @@ -239,7 +257,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, } else { // mmap can free inside an allocation // overallocate... - p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, is_zero, stats); + p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; // and selectively unmap parts around the over-allocated area. (noop on sbrk) void* aligned_p = mi_align_up_ptr(p, alignment); @@ -247,8 +265,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = over_size - pre_size - mid_size; mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); - if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); - if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + if (pre_size > 0) mi_os_prim_free(p, pre_size, commit, stats); + if (post_size > 0) mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; } @@ -263,31 +281,38 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, OS API: alloc and alloc_aligned ----------------------------------------------------------- */ -void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* tld_stats) { +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); + *memid = _mi_memid_none(); mi_stats_t* stats = &_mi_stats_main; if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); - bool is_large = false; - bool is_zerox = false; - void* p = mi_os_mem_alloc(size, 0, true, false, &is_large, &is_zerox, stats); - if (is_zero != NULL) { *is_zero = is_zerox; } + bool os_is_large = false; + bool os_is_zero = false; + void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); + if (p != NULL) { + *memid = _mi_memid_create_os(true, os_is_zero); + memid->is_pinned = os_is_large; + } return p; } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); + *memid = _mi_memid_none(); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); bool os_is_large = false; bool os_is_zero = false; - void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ ); - if (is_large != NULL) { *is_large = os_is_large; } - if (is_zero != NULL) { *is_zero = os_is_zero; } + void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ ); + if (p != NULL) { + *memid = _mi_memid_create_os(commit, os_is_zero); + memid->is_pinned = os_is_large; + } return p; } @@ -299,22 +324,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo to use the actual start of the memory region. ----------------------------------------------------------- */ -void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); + *memid = _mi_memid_none(); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { // regular aligned allocation - return _mi_os_alloc_aligned(size, alignment, commit, allow_large, is_large, is_zero, tld_stats); + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats); } else { // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; - void* start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, is_large, is_zero, tld_stats); + void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats); if (start == NULL) return NULL; - void* p = (uint8_t*)start + extra; + memid->mem.os.base = start; + void* const p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); // decommit the overallocation at the start if (commit && extra > _mi_os_page_size()) { @@ -324,14 +351,6 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse } } -void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { - mi_assert(align_offset <= MI_SEGMENT_SIZE); - const size_t extra = _mi_align_up(align_offset, alignment) - align_offset; - void* start = (uint8_t*)p - extra; - _mi_os_free_ex(start, size + extra, was_committed, tld_stats); -} - - /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ @@ -535,7 +554,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { #endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, bool* is_zero) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) { + *memid = _mi_memid_none(); if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; @@ -550,11 +570,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse bool all_zero = true; while (page < pages) { // allocate a page - bool is_zerox = false; + bool is_zero = false; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* p = NULL; - int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zerox, &p); - if (!is_zerox) { all_zero = false; } + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p); + if (!is_zero) { all_zero = false; } if (err != 0) { _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); break; @@ -565,7 +585,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // no success, issue a warning and break if (p != NULL) { _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main); } break; } @@ -593,17 +613,22 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) { *pages_reserved = page; } if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } - if (is_zero != NULL) { *is_zero = all_zero; } + if (page != 0) { + mi_assert(start != NULL); + *memid = _mi_memid_create_os(true, all_zero); + memid->memkind = MI_MEM_OS_HUGE; + memid->is_pinned = true; + } return (page == 0 ? NULL : start); } // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. -void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { - _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats); size -= MI_HUGE_OS_PAGE_SIZE; base += MI_HUGE_OS_PAGE_SIZE; } From 4d976270ebc18a7968659dfe31f0435652bde4cd Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 17:18:39 -0700 Subject: [PATCH 072/102] review is_zero flag --- include/mimalloc/internal.h | 2 +- include/mimalloc/types.h | 2 +- src/alloc.c | 11 ++++++++--- src/page.c | 13 ++++--------- src/prim/unix/prim.c | 2 +- test/test-stress.c | 2 ++ 6 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b320f690..2140cadd 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -300,7 +300,7 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { } // Is memory zero initialized? -static inline bool mi_mem_is_zero(void* p, size_t size) { +static inline bool mi_mem_is_zero(const void* p, size_t size) { for (size_t i = 0; i < size; i++) { if (((uint8_t*)p)[i] != 0) return false; } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 60408ed3..9851ef5e 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -285,7 +285,7 @@ typedef struct mi_page_s { uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` uint8_t segment_in_use:1; // `true` if the segment allocated this page uint8_t is_committed:1; // `true` if the page virtual memory is committed - uint8_t is_zero_init:1; // `true` if the page was zero initialized + uint8_t is_zero_init:1; // `true` if the page was initially zero initialized // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` diff --git a/src/alloc.c b/src/alloc.c index b17adf45..3edc0b51 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -46,12 +46,17 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // zero the block? note: we need to zero the full block size (issue #63) if mi_unlikely(zero) { mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) - const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size); - _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE); + if (page->is_zero) { + block->next = 0; + } + else { + mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE); + _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE); + } } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN - if (!page->is_zero && !zero && !mi_page_is_huge(page)) { + if (!zero && !mi_page_is_huge(page)) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); } #elif (MI_SECURE!=0) diff --git a/src/page.c b/src/page.c index 3445e504..27c6fd09 100644 --- a/src/page.c +++ b/src/page.c @@ -639,11 +639,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // enable the new free list page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); - - // extension into zero initialized memory preserves the zero'd free list - if (!page->is_zero_init) { - page->is_zero = false; - } mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -667,12 +662,12 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif - #if MI_DEBUG > 0 - page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501 - #else page->is_zero = page->is_zero_init; + #if MI_DEBUG>1 + if (page->is_zero_init) { + mi_mem_is_zero(page_start, page_size); + } #endif - mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index de50f594..4349f578 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -441,7 +441,7 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bo int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); - *is_zero = true; + *is_zero = false; *addr = NULL; return ENOMEM; } diff --git a/test/test-stress.c b/test/test-stress.c index 0d6d0bc9..0dccec0a 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -20,6 +20,7 @@ terms of the MIT license. #include #include #include +#include // > mimalloc-test-stress [THREADS] [SCALE] [ITER] // @@ -106,6 +107,7 @@ static void* alloc_items(size_t items, random_t r) { uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t)); if (p != NULL) { for (uintptr_t i = 0; i < items; i++) { + assert(p[i] == 0); p[i] = (items - i) ^ cookie; } } From 09e42aea4e46c95e4d9aafa8189e48cbd2748f65 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 17:41:18 -0700 Subject: [PATCH 073/102] clarify is_zero meaing --- include/mimalloc/types.h | 2 +- src/alloc-aligned.c | 2 +- src/alloc.c | 2 +- src/page.c | 15 ++++++--------- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 9851ef5e..6727a87c 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -291,7 +291,7 @@ typedef struct mi_page_s { uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - uint8_t is_zero:1; // `true` if the blocks in the free list are zero initialized + uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized uint8_t retire_expire:7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 65b28748..be62efda 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -222,7 +222,7 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne if (newp != NULL) { if (zero && newsize > size) { const mi_page_t* page = _mi_ptr_page(newp); - if (page->is_zero) { + if (page->free_is_zero) { // already zero initialized mi_assert_expensive(mi_mem_is_zero(newp,newsize)); } diff --git a/src/alloc.c b/src/alloc.c index 3edc0b51..ac167891 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -46,7 +46,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // zero the block? note: we need to zero the full block size (issue #63) if mi_unlikely(zero) { mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) - if (page->is_zero) { + if (page->free_is_zero) { block->next = 0; } else { diff --git a/src/page.c b/src/page.c index 27c6fd09..c61677b6 100644 --- a/src/page.c +++ b/src/page.c @@ -84,7 +84,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); #if MI_DEBUG>3 // generally too expensive to check this - if (page->is_zero) { + if (page->free_is_zero) { const size_t ubsize = mi_page_usable_block_size(page); for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); @@ -220,7 +220,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // usual case page->free = page->local_free; page->local_free = NULL; - page->is_zero = false; + page->free_is_zero = false; } else if (force) { // append -- only on shutdown (force) as this is a linear operation @@ -232,7 +232,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_block_set_next(page, tail, page->free); page->free = page->local_free; page->local_free = NULL; - page->is_zero = false; + page->free_is_zero = false; } } @@ -662,12 +662,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif - page->is_zero = page->is_zero_init; - #if MI_DEBUG>1 - if (page->is_zero_init) { - mi_mem_is_zero(page_start, page_size); - } - #endif + page->free_is_zero = page->is_zero_init; + mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size)); + mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); From 173382ef6001d23119bd05353c8a0531b781d16c Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 17:52:16 -0700 Subject: [PATCH 074/102] is_zero review; fix asan check --- src/alloc.c | 3 ++- src/page.c | 7 ++++++- test/test-stress.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index ac167891..da647ca6 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -46,11 +46,12 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // zero the block? note: we need to zero the full block size (issue #63) if mi_unlikely(zero) { mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) + mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE); if (page->free_is_zero) { block->next = 0; + mi_track_mem_defined(block, page->xblock_size - MI_PADDING_SIZE); } else { - mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE); _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE); } } diff --git a/src/page.c b/src/page.c index c61677b6..93636b0e 100644 --- a/src/page.c +++ b/src/page.c @@ -663,7 +663,12 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->keys[1] = _mi_heap_random_next(heap); #endif page->free_is_zero = page->is_zero_init; - mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size)); + #if MI_DEBUG>2 + if (page->is_zero_init) { + mi_track_mem_defined(page_start, page_size); + mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size)); + } + #endif mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); diff --git a/test/test-stress.c b/test/test-stress.c index 0dccec0a..3ecb67bd 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -39,7 +39,7 @@ static int ITER = 50; // N full iterations destructing and re-creating a #define STRESS // undefine for leak test -static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100) +static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100) static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? From e6f3647556317f3d7f86d0f222a5c412a52fc748 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 19:00:50 -0700 Subject: [PATCH 075/102] track os alignment and base --- include/mimalloc/internal.h | 3 +- include/mimalloc/types.h | 1 + src/os.c | 65 ++++++++++++++++++++++--------------- 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2140cadd..f3837973 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -671,10 +671,11 @@ static inline mi_memid_t _mi_memid_none(void) { return _mi_memid_create(MI_MEM_NONE); } -static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero) { +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { mi_memid_t memid = _mi_memid_create(MI_MEM_OS); memid.was_committed = committed; memid.was_zero = is_zero; + memid.is_pinned = is_large; return memid; } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 6727a87c..cd207abd 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -341,6 +341,7 @@ static inline bool mi_memkind_is_os(mi_memkind_t memkind) { typedef struct mi_memid_os_info { void* base; // actual base address of the block (used for offset aligned allocations) + size_t alignment; // alignment at allocation } mi_memid_os_info_t; typedef struct mi_memid_arena_info { diff --git a/src/os.c b/src/os.c index 589399a4..f243f7a4 100644 --- a/src/os.c +++ b/src/os.c @@ -164,24 +164,28 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st } void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) { - size_t csize = _mi_os_good_alloc_size(size); - if (memid.memkind == MI_MEM_OS) { + if (mi_memkind_is_os(memid.memkind)) { + size_t csize = _mi_os_good_alloc_size(size); + void* base = addr; + // different base? (due to alignment) if (memid.mem.os.base != NULL) { mi_assert(memid.mem.os.base <= addr); + mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr); + base = memid.mem.os.base; csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); - mi_os_prim_free(memid.mem.os.base, csize, still_committed, tld_stats); + } + // free it + if (memid.memkind == MI_MEM_OS_HUGE) { + mi_assert(memid.is_pinned); + mi_os_free_huge_os_pages(base, csize, tld_stats); } else { - mi_os_prim_free(addr, csize, still_committed, tld_stats); + mi_os_prim_free(base, csize, still_committed, tld_stats); } } - else if (memid.memkind == MI_MEM_OS_HUGE) { - mi_assert(memid.is_pinned); - mi_os_free_huge_os_pages(addr, size, tld_stats); - } else { // nothing to do - mi_assert(memid.memkind <= MI_MEM_EXTERNAL); + mi_assert(memid.memkind < MI_MEM_OS); } } @@ -220,11 +224,12 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { +static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_large != NULL); mi_assert_internal(is_zero != NULL); + mi_assert_internal(base != NULL); if (!commit) allow_large = false; if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); @@ -233,8 +238,12 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; - // if not aligned, free it, overallocate, and unmap around it - if (((uintptr_t)p % alignment != 0)) { + // aligned already? + if (((uintptr_t)p % alignment) == 0) { + *base = p; + } + else { + // if not aligned, free it, overallocate, and unmap around it _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); mi_os_prim_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow @@ -242,12 +251,13 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block // over-allocate uncommitted (virtual) memory - p = mi_os_prim_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); + p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); if (p == NULL) return NULL; - + // set p to the aligned part in the full region - // note: this is dangerous on Windows as VirtualFree needs the actual region pointer - // but in mi_os_mem_free we handle this (hopefully exceptional) situation. + // note: this is dangerous on Windows as VirtualFree needs the actual base pointer + // this is handled though by having the `base` field in the memid's + *base = p; // remember the base p = mi_align_up_ptr(p, alignment); // explicitly commit only the aligned part @@ -259,6 +269,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // overallocate... p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; + // and selectively unmap parts around the over-allocated area. (noop on sbrk) void* aligned_p = mi_align_up_ptr(p, alignment); size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; @@ -269,10 +280,11 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit if (post_size > 0) mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; + *base = aligned_p; // since we freed the pre part, `*base == p`. } } - mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0)); + mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0)); return p; } @@ -291,8 +303,7 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) { bool os_is_zero = false; void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); if (p != NULL) { - *memid = _mi_memid_create_os(true, os_is_zero); - memid->is_pinned = os_is_large; + *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); } return p; } @@ -308,11 +319,13 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo bool os_is_large = false; bool os_is_zero = false; - void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ ); + void* os_base = NULL; + void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ ); if (p != NULL) { - *memid = _mi_memid_create_os(commit, os_is_zero); - memid->is_pinned = os_is_large; - } + *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); + memid->mem.os.base = os_base; + memid->mem.os.alignment = alignment; + } return p; } @@ -340,7 +353,7 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse const size_t oversize = size + extra; void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats); if (start == NULL) return NULL; - memid->mem.os.base = start; + void* const p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); // decommit the overallocation at the start @@ -615,9 +628,9 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } if (page != 0) { mi_assert(start != NULL); - *memid = _mi_memid_create_os(true, all_zero); + *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); memid->memkind = MI_MEM_OS_HUGE; - memid->is_pinned = true; + mi_assert(memid->is_pinned); } return (page == 0 ? NULL : start); } From b549c88e6c85d4a1057a9ccf15ac35ebf38e9366 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 19:48:40 -0700 Subject: [PATCH 076/102] review realloc --- src/alloc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index da647ca6..3ee4dad1 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -700,14 +700,12 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); - memset((uint8_t*)newp + start, 0, newsize - start); + _mi_memzero((uint8_t*)newp + start, newsize - start); } if mi_likely(p != NULL) { - if mi_likely(_mi_is_aligned(p, sizeof(uintptr_t))) { // a client may pass in an arbitrary pointer `p`.. - const size_t copysize = (newsize > size ? size : newsize); - mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. - _mi_memcpy_aligned(newp, p, copysize); - } + const size_t copysize = (newsize > size ? size : newsize); + mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. + _mi_memcpy(newp, p, copysize); mi_free(p); // only free the original pointer if successful } } From 4cb5b45178835c9df706194edf837d02a64efc6f Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 21 Apr 2023 09:37:05 -0700 Subject: [PATCH 077/102] fix possible underflow (issue #731) --- src/segment.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/segment.c b/src/segment.c index 41c28065..359bd258 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1273,9 +1273,12 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc mi_assert_internal(page->used == 1); // this is called just before the free mi_assert_internal(page->free == NULL); if (segment->allow_decommit && page->is_committed) { - const size_t usize = mi_usable_size(block) - sizeof(mi_block_t); - uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); - _mi_os_reset(p, usize, &_mi_stats_main); + size_t usize = mi_usable_size(block); + if (usize > sizeof(mi_block_t)) { + usize = usize - sizeof(mi_block_t); + uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); + _mi_os_reset(p, usize, &_mi_stats_main); + } } } #endif From 3bc577004ad0ad348f69e8926244d2b9d69b1863 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 21 Apr 2023 09:37:25 -0700 Subject: [PATCH 078/102] clarify return codes of VirtualAlloc (issue #731) --- src/prim/windows/prim.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 1544c641..61532737 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -261,7 +261,7 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la int _mi_prim_commit(void* addr, size_t size) { void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); - return (p == addr ? 0 : (int)GetLastError()); + return (p != NULL ? 0 : (int)GetLastError()); } int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { @@ -274,11 +274,11 @@ int _mi_prim_reset(void* addr, size_t size) { void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == addr); #if 1 - if (p == addr && addr != NULL) { + if (p != NULL) { VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory from the working set } #endif - return (p == addr ? 0 : (int)GetLastError()); + return (p != NULL ? 0 : (int)GetLastError()); } int _mi_prim_protect(void* addr, size_t size, bool protect) { From 012f7164851acedfb9ae20d2909c7d402b2f4974 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 21 Apr 2023 10:37:22 -0700 Subject: [PATCH 079/102] add is_zero flag to prim_commit --- include/mimalloc/prim.h | 3 ++- src/os.c | 11 ++++++++--- src/prim/unix/prim.c | 3 ++- src/prim/wasi/prim.c | 3 ++- src/prim/windows/prim.c | 18 +++++++++++++++--- 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 40f5d2d7..9e560696 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -47,7 +47,8 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la // Commit memory. Returns error code or 0 on success. // For example, on Linux this would make the memory PROT_READ|PROT_WRITE. -int _mi_prim_commit(void* addr, size_t size); +// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows) +int _mi_prim_commit(void* addr, size_t size, bool* is_zero); // Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true // if the memory would need to be re-committed. For example, on Windows this is always true, diff --git a/src/os.c b/src/os.c index f243f7a4..1b435335 100644 --- a/src/os.c +++ b/src/os.c @@ -405,12 +405,17 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats if (csize == 0) return true; // commit - int err = _mi_prim_commit(start, csize); + bool os_is_zero = false; + int err = _mi_prim_commit(start, csize, &os_is_zero); if (err != 0) { _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + return false; } - mi_assert_internal(err == 0); - return (err == 0); + if (os_is_zero && is_zero != NULL) { + *is_zero = true; + mi_assert_expensive(mi_mem_is_zero(start, csize)); + } + return true; } static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) { diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 4349f578..9a542d3e 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -346,8 +346,9 @@ static void unix_mprotect_hint(int err) { -int _mi_prim_commit(void* start, size_t size) { +int _mi_prim_commit(void* start, size_t size, bool* is_zero) { // commit: ensure we can access the area + *is_zero = false; int err = mprotect(start, size, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } unix_mprotect_hint(err); diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index bf78a258..50511f0b 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -128,8 +128,9 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la // Commit/Reset/Protect //--------------------------------------------- -int _mi_prim_commit(void* addr, size_t size) { +int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { MI_UNUSED(addr); MI_UNUSED(size); + *is_zero = false; return 0; } diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 61532737..bde48a7d 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -259,14 +259,26 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la #pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit) #endif -int _mi_prim_commit(void* addr, size_t size) { +int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { + *is_zero = false; + /* + // zero'ing only happens on an initial commit... but checking upfront seems expensive.. + _MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo); + if (VirtualQuery(addr, &meminfo, size) > 0) { + if ((meminfo.State & MEM_COMMIT) == 0) { + *is_zero = true; + } + } + */ + // commit void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); - return (p != NULL ? 0 : (int)GetLastError()); + if (p == NULL) return (int)GetLastError(); + return 0; } int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); - *needs_recommit = true; // for safetly, assume always decommitted even in the case of an error. + *needs_recommit = true; // for safety, assume always decommitted even in the case of an error. return (ok ? 0 : (int)GetLastError()); } From e47adc2d22ddcf9201c4110bc7c87bbb4cd000e2 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 21 Apr 2023 11:33:50 -0700 Subject: [PATCH 080/102] track objects in heap destroy for ETW --- include/mimalloc/track.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/track.h b/include/mimalloc/track.h index f78e8daa..9545f750 100644 --- a/include/mimalloc/track.h +++ b/include/mimalloc/track.h @@ -79,7 +79,7 @@ defined, undefined, or not accessible at all: // windows event tracing #define MI_TRACK_ENABLED 1 -#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_HEAP_DESTROY 1 #define MI_TRACK_TOOL "ETW" #define WIN32_LEAN_AND_MEAN From e90ad987cca3fd7f86d6b4a3d32e30681e715dec Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 22 Apr 2023 10:18:18 -0700 Subject: [PATCH 081/102] add zero initialization on zero sized reallocation (issue #725) --- src/alloc.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index 3ee4dad1..b17fdbdc 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -37,6 +37,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz page->used++; page->free = mi_block_next(page, block); mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); + #if MI_DEBUG>3 + if (page->free_is_zero) { + mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block))); + } + #endif // allow use of the block internally // note: when tracking we need to avoid ever touching the MI_PADDING since @@ -53,7 +58,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } else { _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE); - } + } } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN @@ -116,6 +121,11 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif + #if MI_DEBUG>3 + if (p != NULL && zero) { + mi_assert_expensive(mi_mem_is_zero(p, size)); + } + #endif return p; } @@ -145,6 +155,11 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif + #if MI_DEBUG>3 + if (p != NULL && zero) { + mi_assert_expensive(mi_mem_is_zero(p, size)); + } + #endif return p; } } @@ -702,6 +717,9 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); _mi_memzero((uint8_t*)newp + start, newsize - start); } + else if (newsize == 0) { + ((uint8_t*)newp)[0] = 0; // work around for applications that expect zero-reallocation to be zero initialized (issue #725) + } if mi_likely(p != NULL) { const size_t copysize = (newsize > size ? size : newsize); mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. From 2a6352fef236a0b1cf761a0a0ad92aeea7bbc331 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 22 Apr 2023 10:18:57 -0700 Subject: [PATCH 082/102] add further zero initialization checks --- ide/vs2022/mimalloc-test-api.vcxproj | 7 +++++++ ide/vs2022/mimalloc.vcxproj | 1 - ide/vs2022/mimalloc.vcxproj.filters | 3 --- src/alloc-aligned.c | 14 ++++---------- src/page.c | 8 ++++++++ test/test-api.c | 23 +++++++++++++++++++++++ 6 files changed, 42 insertions(+), 14 deletions(-) diff --git a/ide/vs2022/mimalloc-test-api.vcxproj b/ide/vs2022/mimalloc-test-api.vcxproj index 6023c251..d9b9cae4 100644 --- a/ide/vs2022/mimalloc-test-api.vcxproj +++ b/ide/vs2022/mimalloc-test-api.vcxproj @@ -141,7 +141,14 @@ + + true + true + true + true + + false diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 77a1711b..a02a8393 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -243,7 +243,6 @@ - diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters index 28ba20b1..b3cdb3b3 100644 --- a/ide/vs2022/mimalloc.vcxproj.filters +++ b/ide/vs2022/mimalloc.vcxproj.filters @@ -69,9 +69,6 @@ Headers - - Headers - Headers diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index be62efda..0907811e 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -218,19 +218,13 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne return p; // reallocation still fits, is aligned and not more than 50% waste } else { + // note: we don't zero allocate upfront so we only zero initialize the expanded part void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset); if (newp != NULL) { if (zero && newsize > size) { - const mi_page_t* page = _mi_ptr_page(newp); - if (page->free_is_zero) { - // already zero initialized - mi_assert_expensive(mi_mem_is_zero(newp,newsize)); - } - else { - // also set last word in the previous allocation to zero to ensure any padding is zero-initialized - size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); - memset((uint8_t*)newp + start, 0, newsize - start); - } + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + _mi_memzero((uint8_t*)newp + start, newsize - start); } _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); mi_free(p); // only free if successful diff --git a/src/page.c b/src/page.c index 93636b0e..5fefc3b5 100644 --- a/src/page.c +++ b/src/page.c @@ -66,6 +66,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { if (p < start || p >= end) return false; p = mi_block_next(page, p); } +#if MI_DEBUG>3 // generally too expensive to check this + if (page->free_is_zero) { + const size_t ubsize = mi_page_usable_block_size(page); + for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) { + mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); + } + } +#endif return true; } diff --git a/test/test-api.c b/test/test-api.c index 829d7d35..8dd24e1b 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -46,6 +46,14 @@ bool test_heap2(void); bool test_stl_allocator1(void); bool test_stl_allocator2(void); +bool mem_is_zero(uint8_t* p, size_t size) { + if (p==NULL) return false; + for (size_t i = 0; i < size; ++i) { + if (p[i] != 0) return false; + } + return true; +} + // --------------------------------------------------------------------------- // Main testing // --------------------------------------------------------------------------- @@ -232,6 +240,21 @@ int main(void) { } result = ok; }; + CHECK_BODY("zalloc-aligned-small1") { + size_t zalloc_size = MI_SMALL_SIZE_MAX / 2; + uint8_t* p = (uint8_t*)mi_zalloc_aligned(zalloc_size, MI_MAX_ALIGN_SIZE * 2); + result = mem_is_zero(p, zalloc_size); + mi_free(p); + }; + CHECK_BODY("rezalloc_aligned-small1") { + size_t zalloc_size = MI_SMALL_SIZE_MAX / 2; + uint8_t* p = (uint8_t*)mi_zalloc_aligned(zalloc_size, MI_MAX_ALIGN_SIZE * 2); + result = mem_is_zero(p, zalloc_size); + zalloc_size *= 3; + p = (uint8_t*)mi_rezalloc_aligned(p, zalloc_size, MI_MAX_ALIGN_SIZE * 2); + result = result && mem_is_zero(p, zalloc_size); + mi_free(p); + }; // --------------------------------------------------- // Reallocation From bdead26c7743764f8e49d5a0a7e3e87dac1580fc Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 22 Apr 2023 17:26:51 -0700 Subject: [PATCH 083/102] improve static alloc precision --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 04d5c49c..9ae78469 100644 --- a/src/arena.c +++ b/src/arena.c @@ -146,7 +146,7 @@ static _Atomic(size_t) mi_arena_static_top; static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { *memid = _mi_memid_none(); if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; - if (mi_atomic_load_relaxed(&mi_arena_static_top) >= MI_ARENA_STATIC_MAX) return NULL; + if ((mi_atomic_load_relaxed(&mi_arena_static_top) + size) > MI_ARENA_STATIC_MAX) return NULL; // try to claim space if (alignment == 0) { alignment = 1; } From d81cc76bbc975fdca88ee81d770af989152967b0 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 22 Apr 2023 18:44:00 -0700 Subject: [PATCH 084/102] simplifying arena code --- src/arena.c | 74 ++++++++++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 44 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9ae78469..39151070 100644 --- a/src/arena.c +++ b/src/arena.c @@ -209,12 +209,11 @@ static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index Arena Allocation ----------------------------------------------------------- */ -static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool commit, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) +static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool commit, mi_memid_t* memid, mi_os_tld_t* tld) { MI_UNUSED(arena_index); mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); - if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; mi_bitmap_index_t bitmap_index; if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; @@ -263,24 +262,29 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_ } // allocate in a speficic arena -static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, int numa_node, size_t size, size_t alignment, - bool commit, bool allow_large, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) +static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); const size_t bcount = mi_block_count_of_size(size); const size_t arena_index = mi_arena_id_index(arena_id); - mi_assert_internal(arena_index < max_arena); + mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); mi_assert_internal(size <= mi_arena_block_size(bcount)); - if (arena_index >= max_arena) return NULL; - - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + + // Check arena suitability + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; - if (arena->numa_node >= 0 && arena->numa_node != numa_node) return NULL; if (!allow_large && arena->is_large) return NULL; - return mi_arena_alloc_at(arena, arena_index, bcount, commit, req_arena_id, memid, tld); + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; + const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node || req_arena_id != _mi_arena_id_none()); + if (match_numa_node) { if (!numa_suitable) return NULL; } + else { if (numa_suitable) return NULL; } + + // try to allocate + void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, memid, tld); + mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment)); + return p; } @@ -298,40 +302,22 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t size_t arena_index = mi_arena_id_index(req_arena_id); if (arena_index < MI_MAX_ARENAS && arena_index < max_arena) { - // try a specific arena if requested - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); - if ((arena != NULL) && - // (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (allow_large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages - { - void* p = mi_arena_alloc_at(arena, arena_index, bcount, commit, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; - } + // try a specific arena if requested + void* p = mi_arena_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; } else { + mi_assert_internal(req_arena_id == _mi_arena_id_none()); // try numa affine allocation - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); - if (arena != NULL && - (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (allow_large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages - { - void* p = mi_arena_alloc_at(arena, i, bcount, commit, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; - } + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; } // try from another numa node instead.. - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); - if (arena != NULL && - (arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! - (allow_large || !arena->is_large)) // large OS pages allowed, or the arena does not consist of large OS pages - { - void* p = mi_arena_alloc_at(arena, i, bcount, commit, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); + if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } @@ -339,10 +325,10 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t return NULL; } -// try to reserve a fresh arena +// try to reserve a fresh arena space static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) { - if (_mi_preloading()) return false; + if (_mi_preloading()) return false; // use OS only while pre loading if (req_arena_id != _mi_arena_id_none()) return false; const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); @@ -387,7 +373,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset mi_arena_id_t arena_id = 0; if (mi_arena_reserve(size,allow_large,req_arena_id,&arena_id)) { // and try allocate in there - p = mi_arena_alloc_at_id(arena_id, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + p = mi_arena_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } From 4dcd611ec40697bcb58ee89d89b1d6766ea2023a Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 22 Apr 2023 18:47:19 -0700 Subject: [PATCH 085/102] simplifying arena code --- src/arena.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index 39151070..9219a841 100644 --- a/src/arena.c +++ b/src/arena.c @@ -300,14 +300,14 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t if mi_likely(max_arena == 0) return NULL; mi_assert_internal(size <= mi_arena_block_size(bcount)); - size_t arena_index = mi_arena_id_index(req_arena_id); - if (arena_index < MI_MAX_ARENAS && arena_index < max_arena) { + if (req_arena_id != _mi_arena_id_none()) { // try a specific arena if requested - void* p = mi_arena_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); - if (p != NULL) return p; + if (mi_arena_id_index(req_arena_id) < max_arena) { + void* p = mi_arena_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + } } else { - mi_assert_internal(req_arena_id == _mi_arena_id_none()); // try numa affine allocation for (size_t i = 0; i < max_arena; i++) { void* p = mi_arena_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); @@ -373,6 +373,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset mi_arena_id_t arena_id = 0; if (mi_arena_reserve(size,allow_large,req_arena_id,&arena_id)) { // and try allocate in there + mi_assert_internal(req_arena_id == _mi_arena_id_none()); p = mi_arena_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } From 0e1670a4fbebdb3dd8dd4892f7c9cd7d076abc4e Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 22 Apr 2023 21:32:25 -0700 Subject: [PATCH 086/102] refactor arena code --- src/arena.c | 87 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9219a841..1f7657eb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -188,6 +188,10 @@ static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_ } } +static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { + return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex))); +} + /* ----------------------------------------------------------- Thread safe allocation in an arena @@ -219,7 +223,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! - void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); + void* p = mi_arena_block_start(arena, bitmap_index); *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); memid->is_pinned = arena->memid.is_pinned; @@ -262,8 +266,8 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar } // allocate in a speficic arena -static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, - bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) +static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); @@ -276,10 +280,12 @@ static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; if (!allow_large && arena->is_large) return NULL; - if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; - const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node || req_arena_id != _mi_arena_id_none()); - if (match_numa_node) { if (!numa_suitable) return NULL; } - else { if (numa_suitable) return NULL; } + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; + if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity + const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); + if (match_numa_node) { if (!numa_suitable) return NULL; } + else { if (numa_suitable) return NULL; } + } // try to allocate void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, memid, tld); @@ -289,9 +295,9 @@ static void* mi_arena_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, // allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t alignment, - bool commit, bool allow_large, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) +static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); @@ -303,21 +309,21 @@ static mi_decl_noinline void* mi_arenas_alloc(int numa_node, size_t size, size_t if (req_arena_id != _mi_arena_id_none()) { // try a specific arena if requested if (mi_arena_id_index(req_arena_id) < max_arena) { - void* p = mi_arena_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } else { // try numa affine allocation for (size_t i = 0; i < max_arena; i++) { - void* p = mi_arena_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } // try from another numa node instead.. if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already for (size_t i = 0; i < max_arena; i++) { - void* p = mi_arena_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } @@ -366,17 +372,19 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - void* p = mi_arenas_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena - mi_arena_id_t arena_id = 0; - if (mi_arena_reserve(size,allow_large,req_arena_id,&arena_id)) { - // and try allocate in there - mi_assert_internal(req_arena_id == _mi_arena_id_none()); - p = mi_arena_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); - if (p != NULL) return p; - } + if (req_arena_id == _mi_arena_id_none()) { + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { + // and try allocate in there + mi_assert_internal(req_arena_id == _mi_arena_id_none()); + p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + } + } } // if we cannot use OS allocation, return NULL @@ -415,13 +423,18 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { Arena purge ----------------------------------------------------------- */ +static long mi_arena_purge_delay(void) { + return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); +} + // reset or decommit in an arena and update the committed/decommit bitmaps +// assumes we own the area (i.e. blocks_in_use is claimed by us) static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(!arena->memid.is_pinned); const size_t size = mi_arena_block_size(blocks); - void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx)); + void* const p = mi_arena_block_start(arena, bitmap_idx); bool needs_recommit; if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { // all blocks are committed, we can purge freely @@ -430,7 +443,8 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, else { // some blocks are not committed -- this can happen when a partially committed block is freed // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge - // we need to ensure we do not try to reset, and also not count the decommit stats (as it was already adjusted) + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), + // and also undo the decommit stats (as it was already adjusted) mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); _mi_stat_increase(&stats->committed, size); @@ -445,11 +459,12 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, } // Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. +// Note: assumes we (still) own the area as we may purge immediately static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_purge != NULL); if (!mi_option_is_enabled(mi_option_allow_purge)) return; - const long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); + const long delay = mi_arena_purge_delay(); if (_mi_preloading() || delay == 0) { // decommit directly mi_arena_purge(arena, bitmap_idx, blocks, stats); @@ -467,20 +482,23 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t } } +// purge a range of blocks // return true if the full range was purged. +// assumes we own the area (i.e. blocks_in_use is claimed by us) static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { const size_t endidx = startidx + bitlen; size_t bitidx = startidx; bool all_purged = false; while (bitidx < endidx) { + // count consequetive ones in the purge mask size_t count = 0; while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) { count++; } if (count > 0) { // found range to be purged - const mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(idx, bitidx); - mi_arena_purge(arena, bitmap_idx, count, stats); + const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx); + mi_arena_purge(arena, range_idx, count, stats); if (count == bitlen) { all_purged = true; } @@ -509,7 +527,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi if (purge != 0) { size_t bitidx = 0; while (bitidx < MI_BITMAP_FIELD_BITS) { - // find length 1 bit range + // find consequetive range of ones in the purge mask size_t bitlen = 0; while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) { bitlen++; @@ -522,7 +540,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi } bitlen--; } - // claimed count bits at in_use + // actual claimed bits at `in_use` if (bitlen > 0) { // read purge again now that we have the in_use bits purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); @@ -530,7 +548,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi full_purge = false; } any_purged = true; - // release claimed in_use bits again + // release the claimed `in_use` bits again _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index); } bitidx += (bitlen+1); // +1 to skip the zero (or end) @@ -539,7 +557,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi } // if not fully purged, make sure to purge again in the future if (!full_purge) { - const long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); + const long delay = mi_arena_purge_delay(); mi_msecs_t expected = 0; mi_atomic_casi64_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); } @@ -547,8 +565,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi } static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { - const long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); - if (_mi_preloading() || delay == 0 || !mi_option_is_enabled(mi_option_allow_purge)) return; // nothing will be scheduled + if (_mi_preloading() || !mi_option_is_enabled(mi_option_allow_purge) || mi_arena_purge_delay() == 0) return; // nothing will be scheduled const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); if (max_arena == 0) return; @@ -586,7 +603,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through if (!all_committed && committed_size > 0) { - // if partially committed, adjust the committed stats + // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) _mi_stat_decrease(&stats->committed, committed_size); } _mi_os_free(p, size, memid, stats); @@ -643,13 +660,13 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi // and make it available to others again bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!all_inuse) { - _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); + _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size); return; }; } else { // arena was none, external, or static; nothing to do - mi_assert_internal(memid.memkind <= MI_MEM_STATIC); + mi_assert_internal(memid.memkind < MI_MEM_OS); } // purge expired decommits From a636d88d99b490f9652a1411fc4e685f66520004 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 22 Apr 2023 21:52:49 -0700 Subject: [PATCH 087/102] remove unused local --- src/arena.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/arena.c b/src/arena.c index 1f7657eb..ec5e5d6f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -302,10 +302,8 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz MI_UNUSED(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - const size_t bcount = mi_block_count_of_size(size); if mi_likely(max_arena == 0) return NULL; - mi_assert_internal(size <= mi_arena_block_size(bcount)); - + if (req_arena_id != _mi_arena_id_none()) { // try a specific arena if requested if (mi_arena_id_index(req_arena_id) < max_arena) { From e68010573fb2b4149b48324bf305a1f13e490750 Mon Sep 17 00:00:00 2001 From: Trout Zhang Date: Sun, 23 Apr 2023 21:39:20 +0800 Subject: [PATCH 088/102] Fix ATOMIC_VAR_INIT deprecation warnings for C17 --- include/mimalloc/atomic.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index d0226029..bd95dd5f 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -39,7 +39,11 @@ terms of the MIT license. A copy of the license can be found in the file #include #define mi_atomic(name) atomic_##name #define mi_memory_order(name) memory_order_##name -#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L) // c17, see issue #735 + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif #endif // Various defines for all used memory orders in mimalloc From 67dcbbfa43bf3d9300b4880fea0296cbb4f58a2a Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 12:46:38 -0700 Subject: [PATCH 089/102] refine c17 check for ATOMIC_VAR_INIT --- include/mimalloc/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index bd95dd5f..fe418fab 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -39,7 +39,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #define mi_atomic(name) atomic_##name #define mi_memory_order(name) memory_order_##name -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L) // c17, see issue #735 +#if !defined(ATOMIC_VAR_INIT) || (__STDC_VERSION__ >= 201710L) // c17, see issue #735 #define MI_ATOMIC_VAR_INIT(x) x #else #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) From a59ae585c785b18cdf3473a9d04ae8fccc709eee Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 13:04:50 -0700 Subject: [PATCH 090/102] remove allow_purge option --- include/mimalloc.h | 41 ++++++++++++++++++++--------------------- src/arena.c | 7 ++++--- src/options.c | 1 - src/os.c | 6 +++--- src/segment.c | 4 ++-- 5 files changed, 29 insertions(+), 30 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 9cc2afba..368c22cc 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -319,32 +319,31 @@ typedef enum mi_option_e { mi_option_show_stats, // print statistics on termination mi_option_verbose, // print verbose messages // the following options are experimental (see src/options.h) - mi_option_eager_commit, - mi_option_arena_eager_commit, - mi_option_purge_decommits, - mi_option_allow_large_os_pages, // enable large (2MiB) OS pages, implies eager commit - mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup + mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) + mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) + mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1) + mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node - mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup + mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup mi_option_deprecated_segment_cache, mi_option_deprecated_page_reset, - mi_option_abandoned_page_purge, - mi_option_deprecated_segment_reset, - mi_option_eager_commit_delay, - mi_option_purge_delay, - mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes. - mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas) - mi_option_os_tag, - mi_option_max_errors, - mi_option_max_warnings, - mi_option_max_segment_reclaim, - mi_option_destroy_on_exit, - mi_option_arena_reserve, - mi_option_arena_purge_mult, - mi_option_allow_purge, + mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination + mi_option_deprecated_segment_reset, + mi_option_eager_commit_delay, + mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. + mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. + mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) + mi_option_os_tag, // tag used for OS logging (macOS only for now) + mi_option_max_errors, // issue at most N error messages + mi_option_max_warnings, // issue at most N warning messages + mi_option_max_segment_reclaim, + mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe. + mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) + mi_option_arena_purge_mult, mi_option_purge_extend_delay, _mi_option_last, - // legacy options + // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, mi_option_eager_region_commit = mi_option_arena_eager_commit, mi_option_reset_decommits = mi_option_purge_decommits, diff --git a/src/arena.c b/src/arena.c index ec5e5d6f..299cc231 100644 --- a/src/arena.c +++ b/src/arena.c @@ -422,6 +422,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { ----------------------------------------------------------- */ static long mi_arena_purge_delay(void) { + // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); } @@ -460,9 +461,9 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, // Note: assumes we (still) own the area as we may purge immediately static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_purge != NULL); - if (!mi_option_is_enabled(mi_option_allow_purge)) return; - const long delay = mi_arena_purge_delay(); + if (delay < 0) return; // is purging allowed at all? + if (_mi_preloading() || delay == 0) { // decommit directly mi_arena_purge(arena, bitmap_idx, blocks, stats); @@ -563,7 +564,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi } static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { - if (_mi_preloading() || !mi_option_is_enabled(mi_option_allow_purge) || mi_arena_purge_delay() == 0) return; // nothing will be scheduled + if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); if (max_arena == 0) return; diff --git a/src/options.c b/src/options.c index 3bbb18e8..f52af8b8 100644 --- a/src/options.c +++ b/src/options.c @@ -90,7 +90,6 @@ static mi_option_desc_t options[_mi_option_last] = #endif { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's - { 1, UNINIT, MI_OPTION(allow_purge) }, // allow decommit/reset to free (physical) memory back to the OS { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, }; diff --git a/src/os.c b/src/os.c index 1b435335..97a9f46d 100644 --- a/src/os.c +++ b/src/os.c @@ -473,14 +473,14 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // to be recommitted if it is to be re-used later on. bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) { - if (!mi_option_is_enabled(mi_option_allow_purge)) return false; + if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? _mi_stat_counter_increase(&stats->purge_calls, 1); _mi_stat_increase(&stats->purged, size); if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) { - bool needs_recommit; + bool needs_recommit = true; mi_os_decommit_ex(p, size, &needs_recommit, stats); return needs_recommit; } @@ -488,7 +488,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed _mi_os_reset(p, size, stats); } - return false; // not decommitted + return false; // needs no recommit } } diff --git a/src/segment.c b/src/segment.c index 359bd258..5635b45c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -347,7 +347,7 @@ static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge } static void mi_pages_try_purge(mi_segments_tld_t* tld) { - if (!mi_option_is_enabled(mi_option_allow_purge)) return; + if (mi_option_get(mi_option_purge_delay) < 0) return; // purging is not allowed mi_msecs_t now = _mi_clock_now(); mi_page_queue_t* pq = &tld->pages_purge; @@ -542,7 +542,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme mi_track_mem_undefined(segment, info_size); MI_UNUSED(info_size); segment->memid = memid; segment->allow_decommit = !memid.is_pinned; - segment->allow_purge = segment->allow_decommit && mi_option_is_enabled(mi_option_allow_purge); + segment->allow_purge = segment->allow_decommit && (mi_option_get(mi_option_purge_delay) >= 0); segment->segment_size = segment_size; mi_segments_track_size((long)(segment_size), tld); _mi_segment_map_allocated_at(segment); From d4fe700089b6caf1edaf558bf92eb7295cdd746f Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 13:13:29 -0700 Subject: [PATCH 091/102] remove spurious track_mem_defined --- src/arena.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 299cc231..3202eb8f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -261,7 +261,6 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar memid->was_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } - // mi_track_mem_undefined(p,mi_arena_block_size(needed_bcount)); return p; } From c9dd6f6dc96597763d1c5a89fa80f70fd0d168fa Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 16:06:58 -0700 Subject: [PATCH 092/102] fix mem tracking for asan --- src/os.c | 13 ++++++++----- src/segment.c | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/os.c b/src/os.c index 97a9f46d..84dcdfa1 100644 --- a/src/os.c +++ b/src/os.c @@ -204,8 +204,8 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo mi_assert_internal(is_zero != NULL); mi_assert_internal(is_large != NULL); if (size == 0) return NULL; - if (!commit) allow_large = false; - if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning + if (!commit) { allow_large = false; } + if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning *is_zero = false; void* p = NULL; @@ -216,7 +216,10 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); - if (commit) { _mi_stat_increase(&stats->committed, size); } + if (commit) { + _mi_stat_increase(&stats->committed, size); + mi_track_mem_defined(p,size); // seems needed for asan (or `mimalloc-test-api` fails) + } } return p; } @@ -276,8 +279,8 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = over_size - pre_size - mid_size; mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); - if (pre_size > 0) mi_os_prim_free(p, pre_size, commit, stats); - if (post_size > 0) mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); } + if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; *base = aligned_p; // since we freed the pre part, `*base == p`. diff --git a/src/segment.c b/src/segment.c index 5635b45c..4effe841 100644 --- a/src/segment.c +++ b/src/segment.c @@ -539,7 +539,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme } } - mi_track_mem_undefined(segment, info_size); MI_UNUSED(info_size); + MI_UNUSED(info_size); segment->memid = memid; segment->allow_decommit = !memid.is_pinned; segment->allow_purge = segment->allow_decommit && (mi_option_get(mi_option_purge_delay) >= 0); From 7f93bf02c9f0b062b1f2b54aedb486d9496eef13 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 16:40:01 -0700 Subject: [PATCH 093/102] be explicit about memory tracking in os.c --- src/arena.c | 4 ++-- src/os.c | 21 ++++++++++++++++++--- src/prim/unix/prim.c | 4 ++++ src/segment.c | 4 ++-- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/arena.c b/src/arena.c index 3202eb8f..3549a907 100644 --- a/src/arena.c +++ b/src/arena.c @@ -370,7 +370,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); - if (p != NULL) return p; + if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena if (req_arena_id == _mi_arena_id_none()) { @@ -396,7 +396,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset } else { return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); - } + } } void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) diff --git a/src/os.c b/src/os.c index 84dcdfa1..db4c85a1 100644 --- a/src/os.c +++ b/src/os.c @@ -155,6 +155,7 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) int err = _mi_prim_free(addr, size); + mi_track_mem_noaccess(addr,size); if (err != 0) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } @@ -218,7 +219,12 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo _mi_stat_increase(&stats->reserved, size); if (commit) { _mi_stat_increase(&stats->committed, size); - mi_track_mem_defined(p,size); // seems needed for asan (or `mimalloc-test-api` fails) + // seems needed for asan (or `mimalloc-test-api` fails) + if (*is_zero) { mi_track_mem_defined(p,size); } + else { mi_track_mem_undefined(p,size); } + } + else { + mi_track_mem_noaccess(p,size); } } return p; @@ -283,7 +289,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; - *base = aligned_p; // since we freed the pre part, `*base == p`. + *base = aligned_p; // since we freed the pre part, `*base == p`. } } @@ -417,7 +423,10 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats if (os_is_zero && is_zero != NULL) { *is_zero = true; mi_assert_expensive(mi_mem_is_zero(start, csize)); - } + } + // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails) + if (os_is_zero) { mi_track_mem_defined(start,csize); } + else { mi_track_mem_undefined(start,csize); } return true; } @@ -439,6 +448,7 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_ _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } mi_assert_internal(err == 0); + mi_track_mem_noaccess(start,csize); return (err == 0); } @@ -468,6 +478,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { if (err != 0) { _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } + mi_track_mem_undefined(start,csize); return (err == 0); } @@ -517,6 +528,8 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (err != 0) { _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize); } + if (protect) { mi_track_mem_noaccess(start,csize); } + else { mi_track_mem_undefined(start,csize); } return (err == 0); } @@ -639,6 +652,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); memid->memkind = MI_MEM_OS_HUGE; mi_assert(memid->is_pinned); + if (all_zero) { mi_track_mem_defined(start,size); } + else { mi_track_mem_undefined(start,size); } } return (page == 0 ? NULL : start); } diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 9a542d3e..12b51f4f 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -190,7 +190,11 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p if (hint != NULL) { p = mmap(hint, size, protect_flags, flags, fd, 0); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? + int err = 0; + #else int err = errno; + #endif _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); } if (p!=MAP_FAILED) return p; diff --git a/src/segment.c b/src/segment.c index 4effe841..99e1c412 100644 --- a/src/segment.c +++ b/src/segment.c @@ -536,9 +536,9 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme // commit failed; we cannot touch the memory: free the segment directly and return `NULL` _mi_arena_free(segment, segment_size, 0, memid, tld_os->stats); return NULL; - } + } } - + MI_UNUSED(info_size); segment->memid = memid; segment->allow_decommit = !memid.is_pinned; From 4b2f6448cb37a1fc4d2e965229484d8c431b5d09 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 19:45:29 -0700 Subject: [PATCH 094/102] fix asan memory tracking for zero initialized memory --- src/os.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/os.c b/src/os.c index db4c85a1..69ad2bf9 100644 --- a/src/os.c +++ b/src/os.c @@ -155,7 +155,6 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) int err = _mi_prim_free(addr, size); - mi_track_mem_noaccess(addr,size); if (err != 0) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } @@ -220,12 +219,11 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo if (commit) { _mi_stat_increase(&stats->committed, size); // seems needed for asan (or `mimalloc-test-api` fails) - if (*is_zero) { mi_track_mem_defined(p,size); } + #ifdef MI_TRACK_ASAN + if (*is_zero) { mi_track_mem_defined(p,size); } else { mi_track_mem_undefined(p,size); } - } - else { - mi_track_mem_noaccess(p,size); - } + #endif + } } return p; } @@ -425,8 +423,10 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats mi_assert_expensive(mi_mem_is_zero(start, csize)); } // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails) + #ifdef MI_TRACK_ASAN if (os_is_zero) { mi_track_mem_defined(start,csize); } - else { mi_track_mem_undefined(start,csize); } + else { mi_track_mem_undefined(start,csize); } + #endif return true; } @@ -448,7 +448,6 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_ _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } mi_assert_internal(err == 0); - mi_track_mem_noaccess(start,csize); return (err == 0); } @@ -478,7 +477,6 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { if (err != 0) { _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } - mi_track_mem_undefined(start,csize); return (err == 0); } @@ -528,8 +526,6 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (err != 0) { _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize); } - if (protect) { mi_track_mem_noaccess(start,csize); } - else { mi_track_mem_undefined(start,csize); } return (err == 0); } @@ -652,8 +648,9 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); memid->memkind = MI_MEM_OS_HUGE; mi_assert(memid->is_pinned); + #ifdef MI_TRACK_ASAN if (all_zero) { mi_track_mem_defined(start,size); } - else { mi_track_mem_undefined(start,size); } + #endif } return (page == 0 ? NULL : start); } From 95c2059e89b75da2507184123a9aee15921c0788 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 19:48:49 -0700 Subject: [PATCH 095/102] fix asan mem tracking --- src/segment.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/segment.c b/src/segment.c index 442b187a..a80c36ac 100644 --- a/src/segment.c +++ b/src/segment.c @@ -877,18 +877,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi &segment_slices, &pre_size, &info_slices, commit, tld, os_tld); if (segment == NULL) return NULL; - // zero the segment info? -- not always needed as it may be zero initialized from the OS - ptrdiff_t ofs = offsetof(mi_segment_t, next); - size_t prefix = offsetof(mi_segment_t, slices) - ofs; - size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more + // zero the segment info? -- not always needed as it may be zero initialized from the OS if (!segment->memid.was_zero) { + ptrdiff_t ofs = offsetof(mi_segment_t, next); + size_t prefix = offsetof(mi_segment_t, slices) - ofs; + size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more _mi_memzero((uint8_t*)segment + ofs, zsize); } - else { - mi_track_mem_defined((uint8_t*)segment + ofs,zsize); - mi_assert(mi_mem_is_zero((uint8_t*)segment + ofs, zsize)); - } - // initialize the rest of the segment info const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); From bfe2651ec9473739b5a3003d35205c160aa35359 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 20:32:22 -0700 Subject: [PATCH 096/102] revise decommit on linux in debug/secure mode --- src/prim/unix/prim.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 12b51f4f..04f02a3b 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -209,28 +209,33 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p return NULL; } +static int unix_mmap_fd(void) { + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) { os_tag = 100; } + return VM_MAKE_TAG(os_tag); + #else + return -1; + #endif +} + static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { - void* p = NULL; #if !defined(MAP_ANONYMOUS) #define MAP_ANONYMOUS MAP_ANON #endif #if !defined(MAP_NORESERVE) #define MAP_NORESERVE 0 #endif + void* p = NULL; + const int fd = unix_mmap_fd(); int flags = MAP_PRIVATE | MAP_ANONYMOUS; - int fd = -1; if (_mi_os_has_overcommit()) { flags |= MAP_NORESERVE; } #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) { os_tag = 100; } - fd = VM_MAKE_TAG(os_tag); - #endif // huge page allocation if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) { static _Atomic(size_t) large_page_try_ok; // = 0; @@ -363,21 +368,21 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; #if defined(MADV_DONTNEED) && !MI_DEBUG && !MI_SECURE // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) *needs_recommit = false; - err = unix_madvise(start, size, MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); #else - // decommit: just disable access (also used in debug and secure mode to trap on illegal access) - *needs_recommit = true; // needs recommit to reuse the memory - err = mprotect(start, size, PROT_NONE); - if (err != 0) { err = errno; } + // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss) + *needs_recommit = true; + const int fd = unix_mmap_fd(); + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } #endif return err; } int _mi_prim_reset(void* start, size_t size) { - // We always use MADV_DONTNEED even if it may be a bit more expensive as this - // guarantees that we see the actual rss reflected in tools like `top`. + // We always use MADV_DONTNEED if possible even if it may be a bit more expensive as MADV_FREE + // as this guarantees that we see the actual rss reflected in tools like `top`. #if 0 && defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); From 88a8b13782b196547e223a8f42adcfbb2b168f67 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 20:53:27 -0700 Subject: [PATCH 097/102] revise unix decommit --- src/prim/unix/prim.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 04f02a3b..335c99e7 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -357,26 +357,36 @@ static void unix_mprotect_hint(int err) { int _mi_prim_commit(void* start, size_t size, bool* is_zero) { // commit: ensure we can access the area - *is_zero = false; + // note: we may think that *is_zero can be true since the memory + // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but + // we sometimes call commit on a range with still partially committed + // memory and `mprotect` does not zero the range. + *is_zero = false; int err = mprotect(start, size, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - unix_mprotect_hint(err); + if (err != 0) { + err = errno; + unix_mprotect_hint(err); + } return err; } int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { - int err = 0; - #if defined(MADV_DONTNEED) && !MI_DEBUG && !MI_SECURE - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + int err = 0; + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + err = unix_madvise(start, size, MADV_DONTNEED); + #if !MI_DEBUG && !MI_SECURE *needs_recommit = false; - err = unix_madvise(start, size, MADV_DONTNEED); #else - // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss) *needs_recommit = true; - const int fd = unix_mmap_fd(); - void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } + mprotect(start, size, PROT_NONE); #endif + /* + // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss) + *needs_recommit = true; + const int fd = unix_mmap_fd(); + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } + */ return err; } From 0ceef8e7287aba45129a86be379a55b3116bdc3a Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 24 Apr 2023 08:15:42 -0700 Subject: [PATCH 098/102] add fix for aligned_alloc override on musl; hopefully does not break Conda builds. issue #733 --- src/alloc-override.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/alloc-override.c b/src/alloc-override.c index 40098ac5..873065dc 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -245,11 +245,13 @@ extern "C" { int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); } // `aligned_alloc` is only available when __USE_ISOC11 is defined. + // Note: it seems __USE_ISOC11 is not defined in musl (and perhaps other libc's) so we only check + // for it if using glibc. // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9. // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves. - #if __USE_ISOC11 + #if !defined(__GLIBC__) || __USE_ISOC11 void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } #endif #endif From 7e4e5450604d774178de70f79d72985db36cc93e Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 24 Apr 2023 08:50:37 -0700 Subject: [PATCH 099/102] update readme for options and upcoming release --- readme.md | 78 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/readme.md b/readme.md index 7af7a264..ecab8131 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.1.1` (2023-04-03). -Latest stable tag: `v1.8.1` (2023-04-03). +Latest release tag: `v2.1.2` (2023-04-24). +Latest stable tag: `v1.8.2` (2023-04-24). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -43,7 +43,7 @@ It also includes a robust way to override the default allocator in [Windows](#ov and the chance of contending on a single location will be low -- this is quite similar to randomized algorithms like skip lists where adding a random oracle removes the need for a more complex algorithm. -- __eager page reset__: when a "page" becomes empty (with increased chance +- __eager page purging__: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused (reset or decommitted) reducing (real) memory pressure and fragmentation, especially in long running programs. @@ -78,6 +78,10 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity + by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory + usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. + * 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. * 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision @@ -105,20 +109,6 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page improved wasm support, faster aligned allocation, various small fixes. -* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including - M1), improved performance for v2 for large objects, Python integration improvements, more standard - installation directories, various small fixes. - -* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix - thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. - -* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). - -* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. - -* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, - improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. - * [Older release notes](#older-release-notes) Special thanks to: @@ -280,43 +270,48 @@ completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options -You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), -or via environment variables: +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables: - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. -- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS - that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server) - programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs. - As an alternative, the `MIMALLOC_RESET_DELAY=` can be set higher (100ms by default) to make the page - reset occur less frequently instead of turning it off completely. + +Advanced options: + +- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge + OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which + can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when + a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher + value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times. + Setting it to `-1` disables purging completely. +- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc + allocates segments and pages. This is by default + only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS) + may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set + (rss) so it is generally ok to enable this. + +Further options for large workloads and services: + - `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed). -- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly +- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that - can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). - -- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at startup and sometimes this can give a large (latency) performance improvement on big workloads. - Usually it is better to not use - `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large + OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived - and allocate just a little to take up space in the huge OS page area (which cannot be reset). + and allocate just a little to take up space in the huge OS page area (which cannot be purged). The huge pages are usually allocated evenly among NUMA nodes. We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all the huge pages at a specific numa node instead. @@ -794,6 +789,16 @@ provided by the bot. You will only need to do this once across all repos using o # Older Release Notes +* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including + M1), improved performance for v2 for large objects, Python integration improvements, more standard + installation directories, various small fixes. +* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix + thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. +* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). +* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. +* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, + improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. + * 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. * 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, @@ -815,6 +820,7 @@ provided by the bot. You will only need to do this once across all repos using o more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. + * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. From 0bb5cecbc2850ee217c7348ed5f269883747892e Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 24 Apr 2023 08:56:42 -0700 Subject: [PATCH 100/102] use most performant reset on Linux (MADV_FREE) and Windows (MEM_RESET) as we use the precise decommit by default for purging anyways --- src/prim/unix/prim.c | 8 +++++--- src/prim/windows/prim.c | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 335c99e7..b64f0173 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -391,9 +391,11 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { } int _mi_prim_reset(void* start, size_t size) { - // We always use MADV_DONTNEED if possible even if it may be a bit more expensive as MADV_FREE - // as this guarantees that we see the actual rss reflected in tools like `top`. - #if 0 && defined(MADV_FREE) + // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it + // will not reduce the `rss` stats in tools like `top` even though the memory is available + // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by + // default `MADV_DONTNEED` is used though. + #if defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); int err; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index bde48a7d..e6b61079 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -285,9 +285,9 @@ int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { int _mi_prim_reset(void* addr, size_t size) { void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == addr); - #if 1 + #if 0 if (p != NULL) { - VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory from the working set + VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set } #endif return (p != NULL ? 0 : (int)GetLastError()); From 632edf999632ff50bdfd89b3c287161da8b835de Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 24 Apr 2023 09:03:43 -0700 Subject: [PATCH 101/102] rename was_zero/committed to initially_zero/committed --- include/mimalloc/internal.h | 4 ++-- include/mimalloc/types.h | 20 ++++++++++---------- src/arena.c | 20 ++++++++++---------- src/init.c | 2 +- src/segment.c | 10 +++++----- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index f3837973..4dabe8ba 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -673,8 +673,8 @@ static inline mi_memid_t _mi_memid_none(void) { static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { mi_memid_t memid = _mi_memid_create(MI_MEM_OS); - memid.was_committed = committed; - memid.was_zero = is_zero; + memid.initially_committed = committed; + memid.initially_zero = is_zero; memid.is_pinned = is_large; return memid; } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index cd207abd..06b96587 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -340,24 +340,24 @@ static inline bool mi_memkind_is_os(mi_memkind_t memkind) { } typedef struct mi_memid_os_info { - void* base; // actual base address of the block (used for offset aligned allocations) - size_t alignment; // alignment at allocation + void* base; // actual base address of the block (used for offset aligned allocations) + size_t alignment; // alignment at allocation } mi_memid_os_info_t; typedef struct mi_memid_arena_info { - size_t block_index; // index in the arena - mi_arena_id_t id; // arena id (>= 1) - bool is_exclusive; // the arena can only be used for specific arena allocations + size_t block_index; // index in the arena + mi_arena_id_t id; // arena id (>= 1) + bool is_exclusive; // the arena can only be used for specific arena allocations } mi_memid_arena_info_t; typedef struct mi_memid_s { union { - mi_memid_os_info_t os; // only used for MI_MEM_OS - mi_memid_arena_info_t arena;// only used for MI_MEM_ARENA + mi_memid_os_info_t os; // only used for MI_MEM_OS + mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA } mem; - bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) - bool was_committed; // `true` if the memory was originally allocated as committed - bool was_zero; // `true` if the memory was originally zero initialized + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) + bool initially_committed;// `true` if the memory was originally allocated as committed + bool initially_zero; // `true` if the memory was originally zero initialized mi_memkind_t memkind; } mi_memid_t; diff --git a/src/arena.c b/src/arena.c index 3549a907..7616bdf8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -234,31 +234,31 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar } // set the dirty bits (todo: no need for an atomic op here?) - memid->was_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); // set commit state if (arena->blocks_committed == NULL) { // always committed - memid->was_committed = true; + memid->initially_committed = true; } else if (commit) { // commit requested, but the range may not be committed as a whole: ensure it is committed now - memid->was_committed = true; + memid->initially_committed = true; bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { bool commit_zero = false; if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) { - memid->was_committed = false; + memid->initially_committed = false; } else { - if (commit_zero) { memid->was_zero = true; } + if (commit_zero) { memid->initially_zero = true; } } } } else { // no need to commit, but check if already fully committed - memid->was_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } return p; @@ -747,7 +747,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int if (size < MI_ARENA_BLOCK_SIZE) return false; if (is_large) { - mi_assert_internal(memid.was_committed && memid.is_pinned); + mi_assert_internal(memid.initially_committed && memid.is_pinned); } const size_t bcount = size / MI_ARENA_BLOCK_SIZE; @@ -776,7 +776,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap // initialize committed bitmap? - if (arena->blocks_committed != NULL && arena->memid.was_committed) { + if (arena->blocks_committed != NULL && arena->memid.initially_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning } @@ -794,8 +794,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); - memid.was_committed = is_committed; - memid.was_zero = is_zero; + memid.initially_committed = is_committed; + memid.initially_zero = is_zero; memid.is_pinned = is_large; return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id); } diff --git a/src/init.c b/src/init.c index cd2fd643..c9aa4b0f 100644 --- a/src/init.c +++ b/src/init.c @@ -218,7 +218,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { } if (td != NULL) { td->memid = memid; - is_zero = memid.was_zero; + is_zero = memid.initially_zero; } } diff --git a/src/segment.c b/src/segment.c index 99e1c412..6798bb66 100644 --- a/src/segment.c +++ b/src/segment.c @@ -201,7 +201,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* // and protect the last (or only) page too mi_assert_internal(MI_SECURE <= 1 || segment->page_kind >= MI_PAGE_LARGE); uint8_t* start = (uint8_t*)segment + segment->segment_size - os_psize; - if (protect && !segment->memid.was_committed) { + if (protect && !segment->memid.initially_committed) { if (protect) { // ensure secure page is committed if (_mi_os_commit(start, os_psize, NULL, tld->stats)) { // if this fails that is ok (as it is an unaccessible page) @@ -528,7 +528,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme return NULL; // failed to allocate } - if (!memid.was_committed) { + if (!memid.initially_committed) { // ensure the initial info is committed mi_assert_internal(!memid.is_pinned); bool ok = _mi_os_commit(segment, pre_size, NULL, tld_os->stats); @@ -586,7 +586,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, init_commit, init_segment_size, tld, os_tld); if (segment == NULL) return NULL; mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - mi_assert_internal(segment->memid.is_pinned ? segment->memid.was_committed : true); + mi_assert_internal(segment->memid.is_pinned ? segment->memid.initially_committed : true); mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan @@ -598,8 +598,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, for (size_t i = 0; i < capacity; i++) { mi_assert_internal(i <= 255); segment->pages[i].segment_idx = (uint8_t)i; - segment->pages[i].is_committed = segment->memid.was_committed; - segment->pages[i].is_zero_init = segment->memid.was_zero; + segment->pages[i].is_committed = segment->memid.initially_committed; + segment->pages[i].is_zero_init = segment->memid.initially_zero; } // initialize From b66e3214d8a104669c2ec05ae91ebc26a8f5ab78 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 24 Apr 2023 09:08:12 -0700 Subject: [PATCH 102/102] be more strict on initially_zero for arena allocation --- src/arena.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 7616bdf8..be1a9ebe 100644 --- a/src/arena.c +++ b/src/arena.c @@ -234,7 +234,9 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar } // set the dirty bits (todo: no need for an atomic op here?) - memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { + memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + } // set commit state if (arena->blocks_committed == NULL) {