diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 413f76e6..25a3d93d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -61,15 +61,15 @@ int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); +void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld); -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); -void _mi_mem_collect(mi_stats_t* stats); +void _mi_mem_collect(mi_os_tld_t* tld); // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -107,7 +107,6 @@ uintptr_t _mi_heap_random(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); -typedef int64_t mi_msecs_t; mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..8a3ffff4 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -385,6 +385,19 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) +// ------------------------------------------------------ +// Delay slots (to avoid expensive OS calls) +// ------------------------------------------------------ +typedef int64_t mi_msecs_t; + +typedef struct mi_delay_slot_s { + mi_msecs_t expire; + uint8_t* addr; + size_t size; +} mi_delay_slot_t; + +#define MI_RESET_DELAY_SLOTS (128) + // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ @@ -395,6 +408,12 @@ typedef struct mi_segment_queue_s { mi_segment_t* last; } mi_segment_queue_t; +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats + mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS]; +} mi_os_tld_t; // Segments thread local data typedef struct mi_segments_tld_s { @@ -408,14 +427,9 @@ typedef struct mi_segments_tld_s { size_t cache_size; // total size of all segments in the cache mi_segment_t* cache; // (small) cache of segments mi_stats_t* stats; // points to tld stats + mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; -// OS thread local data -typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats -} mi_os_tld_t; - // Thread local data struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count diff --git a/include/mimalloc.h b/include/mimalloc.h index c03ddc1e..e6fa9c2b 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -275,6 +275,7 @@ typedef enum mi_option_e { mi_option_reset_decommits, mi_option_eager_commit_delay, mi_option_segment_reset, + mi_option_reset_delay, mi_option_os_tag, mi_option_max_numa_node, mi_option_max_errors, diff --git a/src/heap.c b/src/heap.c index 162cf406..d03925d5 100644 --- a/src/heap.c +++ b/src/heap.c @@ -149,7 +149,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions if (collect >= FORCE && _mi_is_main_thread()) { - _mi_mem_collect(&heap->tld->stats); + _mi_mem_collect(&heap->tld->os); } } diff --git a/src/init.c b/src/init.c index ef848de4..971a93c0 100644 --- a/src/init.c +++ b/src/init.c @@ -94,11 +94,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) +#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments + { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -218,6 +219,7 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/memory.c b/src/memory.c index 75a1df92..e12405c1 100644 --- a/src/memory.c +++ b/src/memory.c @@ -53,6 +53,9 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +// local +static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size); + // Constants #if (MI_INTPTR_SIZE==8) @@ -470,16 +473,19 @@ Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); +void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { + mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; + + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + size_t arena_memid = 0; size_t idx = 0; size_t bitidx = 0; if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) { // was a direct arena allocation, pass through - _mi_arena_free(p, size, arena_memid, stats); + _mi_arena_free(p, size, arena_memid, tld->stats); } else { // allocated in a region @@ -512,14 +518,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments { - _mi_os_reset(p, size, stats); + _mi_os_reset(p, size, tld->stats); // cannot use delay reset! (due to concurrent allocation in the same region) //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? } } } if (!is_eager_committed) { // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); + _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size)); } // TODO: should we free empty regions? currently only done _mi_mem_collect. @@ -539,7 +545,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- collection -----------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_stats_t* stats) { +void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. for (size_t i = 0; i < regions_count; i++) { mem_region_t* region = ®ions[i]; @@ -554,7 +560,8 @@ void _mi_mem_collect(mi_stats_t* stats) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats); + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE); + _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release mi_atomic_write(®ion->info,0); @@ -564,25 +571,123 @@ void _mi_mem_collect(mi_stats_t* stats) { } } +/* ---------------------------------------------------------------------------- + Delay slots +-----------------------------------------------------------------------------*/ + +typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); + +static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, + mi_msecs_t delay, uint8_t* addr, size_t size, + mi_delay_resolve_fun* resolve, void* arg) +{ + if (delay==0) { + resolve(addr, size, arg); + return; + } + + mi_msecs_t now = _mi_clock_now(); + mi_delay_slot_t* oldest = slots; + // walk through all slots, resolving expired ones. + // remember the oldest slot to insert the new entry in. + for (size_t i = 0; i < count; i++) { + mi_delay_slot_t* slot = &slots[i]; + + if (slot->expire == 0) { + // empty slot + oldest = slot; + } + // TODO: should we handle overlapping areas too? + else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { + // earlier slot encompasses new area, increase expiration + slot->expire = now + delay; + delay = 0; + } + else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { + // new one encompasses old slot, overwrite + slot->expire = now + delay; + slot->addr = addr; + slot->size = size; + delay = 0; + } + else if (slot->expire < now) { + // expired slot, resolve now + slot->expire = 0; + resolve(slot->addr, slot->size, arg); + } + else if (oldest->expire > slot->expire) { + oldest = slot; + } + } + if (delay>0) { + // not yet registered, use the oldest slot + if (oldest->expire > 0) { + resolve(oldest->addr, oldest->size, arg); // evict if not empty + } + oldest->expire = now + delay; + oldest->addr = addr; + oldest->size = size; + } +} + +static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size) +{ + uint8_t* addr = (uint8_t*)p; + bool done = false; + // walk through all slots + for (size_t i = 0; i < count; i++) { + mi_delay_slot_t* slot = &slots[i]; + if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { + // earlier slot encompasses the area; remove it + slot->expire = 0; + done = true; + } + else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { + // new one encompasses old slot, remove it + slot->expire = 0; + } + else if ((addr <= slot->addr && addr + size > slot->addr) || + (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { + // partial overlap, remove slot + mi_assert_internal(false); + slot->expire = 0; + } + } + return done; +} + +static void mi_resolve_reset(void* p, size_t size, void* vtld) { + mi_os_tld_t* tld = (mi_os_tld_t*)vtld; + _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay), + (uint8_t*)p, size, &mi_resolve_reset, tld); + return true; +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) { + return _mi_os_unreset(p, size, is_zero, tld->stats); + } + return true; +} + + /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_commit(p, size, is_zero, stats); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + return _mi_os_commit(p, size, is_zero, tld->stats); } -bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_decommit(p, size, stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_reset(p, size, stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_unreset(p, size, is_zero, stats); +bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + return _mi_os_decommit(p, size, tld->stats); } bool _mi_mem_protect(void* p, size_t size) { diff --git a/src/options.c b/src/options.c index 63b1612a..e098af0b 100644 --- a/src/options.c +++ b/src/options.c @@ -70,6 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index 178e0eda..b9abe2b3 100644 --- a/src/segment.c +++ b/src/segment.c @@ -234,7 +234,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->stats); + _mi_mem_free(segment, segment_size, segment->memid, tld->os); } @@ -281,7 +281,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); + _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->os); } segment->next = tld->cache; tld->cache = segment; @@ -346,13 +346,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); + _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os); segment->mem_is_committed = true; } if (!segment->mem_is_fixed && (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { bool reset_zero = false; - _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); + _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os); if (reset_zero) is_zero = true; } } @@ -365,7 +365,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, if (!commit) { // ensure the initial info is committed bool commit_zero = false; - _mi_mem_commit(segment, info_size, &commit_zero, tld->stats); + _mi_mem_commit(segment, info_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } segment->memid = memid; @@ -459,7 +459,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { return (segment->used < segment->capacity); } -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_assert_expensive(mi_segment_is_valid(segment)); for (size_t i = 0; i < segment->capacity; i++) { @@ -472,14 +472,14 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) mi_assert_internal(!segment->mem_is_fixed); page->is_committed = true; bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,stats); + _mi_mem_commit(start,psize,&is_zero,tld->os); if (is_zero) page->is_zero_init = true; } if (page->is_reset) { mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; bool is_zero = false; - _mi_mem_unreset(start, psize, &is_zero, stats); + _mi_mem_unreset(start, psize, &is_zero, tld->os); if (is_zero) page->is_zero_init = true; } } @@ -497,21 +497,20 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); size_t inuse = page->capacity * page->block_size; - _mi_stat_decrease(&stats->page_committed, inuse); - _mi_stat_decrease(&stats->pages, 1); + _mi_stat_decrease(&tld->stats->page_committed, inuse); + _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; - _mi_mem_reset(start, psize, stats); + _mi_mem_reset(start, psize, tld->os); } // zero the page data, but not the segment fields @@ -529,7 +528,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert_expensive(mi_segment_is_valid(segment)); // mark it as free now - mi_segment_page_clear(segment, page, tld->stats); + mi_segment_page_clear(segment, page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -634,7 +633,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen _mi_stat_decrease(&tld->stats->pages_abandoned, 1); if (mi_page_all_free(page)) { // if everything free by now, free the page - mi_segment_page_clear(segment,page,tld->stats); + mi_segment_page_clear(segment,page,tld); } else { // otherwise reclaim it @@ -666,7 +665,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen // Requires that the page has free pages static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld->stats); + mi_page_t* page = mi_segment_find_free(segment, tld); page->segment_in_use = true; segment->used++; mi_assert_internal(segment->used <= segment->capacity); diff --git a/src/stats.c b/src/stats.c index 011fab64..cb6d8866 100644 --- a/src/stats.c +++ b/src/stats.c @@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); - _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1); + _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); }