diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 3be08b94..ee7f1026 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -164,6 +164,7 @@ void _mi_meta_free(void* p, size_t size, mi_memid_t memid); bool _mi_page_map_init(void); void _mi_page_map_register(mi_page_t* page); void _mi_page_map_unregister(mi_page_t* page); +void _mi_page_map_unregister_range(void* start, size_t size); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; @@ -437,14 +438,18 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si extern uint8_t* _mi_page_map; +static inline uintptr_t _mi_page_map_index(const void* p) { + return (((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT); +} + static inline mi_page_t* _mi_ptr_page_ex(const void* p, bool* valid) { #if 1 - const uintptr_t idx = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT; + const uintptr_t idx = _mi_page_map_index(p); const size_t ofs = _mi_page_map[idx]; if (valid != NULL) *valid = (ofs != 0); return (mi_page_t*)((idx - ofs + 1) << MI_ARENA_SLICE_SHIFT); #else - const uintptr_t idx = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT; + const uintptr_t idx = _mi_page_map_index(p); const uintptr_t up = idx << MI_ARENA_SLICE_SHIFT; __builtin_prefetch((void*)up); const size_t ofs = _mi_page_map[idx]; diff --git a/src/arena.c b/src/arena.c index 03f40932..4f89a629 100644 --- a/src/arena.c +++ b/src/arena.c @@ -48,6 +48,7 @@ typedef struct mi_arena_s { mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible) mi_bitmap_t* slices_purge; // can the slice be purged? (slice in purge => slice in free) mi_bitmap_t* slices_dirty; // is the slice potentially non-zero? + mi_bitmap_t* pages; // all registered pages mi_bitmap_t* pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page) // the full queue contains abandoned full pages // followed by the bitmaps (whose size depends on the arena size) @@ -117,7 +118,13 @@ static size_t mi_arena_info_slices(mi_arena_t* arena) { return arena->info_slices; } - +#if MI_DEBUG > 1 +static bool mi_arena_has_page(mi_arena_t* arena, mi_page_t* page) { + return (page->memid.memkind == MI_MEM_ARENA && + page->memid.mem.arena.arena == arena && + mi_bitmap_is_setN(arena->pages, page->memid.mem.arena.slice_index, 1)); +} +#endif /* ----------------------------------------------------------- Util @@ -551,10 +558,11 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); mi_assert_internal(mi_page_is_owned(page)); mi_assert_internal(mi_page_is_abandoned(page)); + mi_assert_internal(mi_arena_has_page(arena,page)); mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]); _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1); - + _mi_page_free_collect(page, false); // update `used` count mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); @@ -588,6 +596,10 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large { page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, tld->tseq, &memid); + if (page != NULL) { + mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count)); + mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index); + } } // otherwise fall back to the OS @@ -758,6 +770,7 @@ void _mi_arena_page_free(mi_page_t* page) { mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1)); + mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1)); // note: we cannot check for `!mi_page_is_abandoned_and_mapped` since that may // be (temporarily) not true if the free happens while trying to reclaim // see `mi_arana_try_claim_abandoned` @@ -765,6 +778,9 @@ void _mi_arena_page_free(mi_page_t* page) { #endif _mi_page_map_unregister(page); + if (page->memid.memkind == MI_MEM_ARENA) { + mi_bitmap_clear(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index); + } mi_arena_free(page, mi_memid_size(page->memid), page->memid); } @@ -1104,6 +1120,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int arena->slices_committed = mi_arena_bitmap_init(slice_count,&base); arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base); arena->slices_purge = mi_arena_bitmap_init(slice_count,&base); + arena->pages = mi_arena_bitmap_init(slice_count, &base); for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) { arena->pages_abandoned[i] = mi_arena_bitmap_init(slice_count,&base); } @@ -1396,6 +1413,18 @@ bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool vi /* ----------------------------------------------------------- Unloading and reloading an arena. ----------------------------------------------------------- */ +static bool mi_arena_page_register(size_t slice_index, mi_arena_t* arena, void* arg) { + MI_UNUSED(arg); + mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); + mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1)); + _mi_page_map_register(page); + mi_assert_internal(_mi_ptr_page(page)==page); + return true; +} + +static bool mi_arena_pages_reregister(mi_arena_t* arena) { + return _mi_bitmap_forall_set(arena->pages, &mi_arena_page_register, arena, NULL); +} mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* full_size) { const size_t count = mi_arena_get_count(); @@ -1416,18 +1445,23 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* _mi_warning_message("can only unload managed arena's for external memory (id %zu at %p)\n", arena_id, arena); return false; } - if (base != NULL) { *base = (void*)arena; } - if (full_size != NULL) { *full_size = arena->memid.mem.os.size; } - if (accessed_size != NULL) { - // scan the commit map for the highest entry - size_t idx; - if (mi_bitmap_bsr(arena->slices_committed, &idx)) { - *accessed_size = (idx + 1)* MI_ARENA_SLICE_SIZE; - } - else { - *accessed_size = mi_arena_info_slices(arena) * MI_ARENA_SLICE_SIZE; - } + + // find accessed size + size_t asize; + // scan the commit map for the highest entry + size_t idx; + if (mi_bitmap_bsr(arena->slices_committed, &idx)) { + asize = (idx + 1)* MI_ARENA_SLICE_SIZE; } + else { + asize = mi_arena_info_slices(arena) * MI_ARENA_SLICE_SIZE; + } + if (base != NULL) { *base = (void*)arena; } + if (full_size != NULL) { *full_size = arena->memid.mem.os.size; } + if (accessed_size != NULL) { *accessed_size = asize; } + + // unregister the pages + _mi_page_map_unregister_range(arena, asize); // set the entry to NULL mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[arena_idx], NULL); @@ -1438,7 +1472,7 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* return true; } -bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, mi_arena_id_t* arena_id) { +mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, mi_arena_id_t* arena_id) { // assume the memory area is already containing the arena if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } if (start == NULL || size == 0) return false; @@ -1466,6 +1500,10 @@ bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, arena->is_exclusive = true; arena->is_large = is_large; arena->id = _mi_arena_id_none(); - return mi_arena_add(arena, arena_id, &_mi_stats_main); + if (!mi_arena_add(arena, arena_id, &_mi_stats_main)) { + return false; + } + mi_arena_pages_reregister(arena); + return true; } diff --git a/src/bitmap.c b/src/bitmap.c index d16a1b24..f1b1a759 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1051,7 +1051,6 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n #define mi_bitmap_forall_chunks(bitmap, tseq, name_chunk_idx) \ { \ /* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \ - MI_UNUSED(tseq); \ const size_t chunk_max_acc = 1 + mi_atomic_load_relaxed(&bitmap->chunk_max_accessed); \ const size_t chunk_start = tseq % chunk_max_acc; /* space out threads? */ \ const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap),MI_BFIELD_BITS); \ @@ -1197,3 +1196,25 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx) { mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); mi_bchunk_clear_once_set(&bitmap->chunks[chunk_idx], cidx); } + + +// Visit all set bits in a bitmap. +// todo: optimize further? maybe popcount to help the branch predictor for the loop, +// and keep b constant (using a mask)? or avx512 to directly get all indices using a mask_compressstore? +bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) { + mi_bitmap_forall_chunks(bitmap, 0, chunk_idx) { + mi_bchunk_t* chunk = &bitmap->chunks[chunk_idx]; + for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) { + const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS); + mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[j]); + size_t bidx; + while (mi_bsf(b, &bidx)) { + b = b & (b-1); // clear low bit + const size_t idx = base_idx + bidx; + if (!visit(idx, arena, arg)) return false; + } + } + } + mi_bitmap_forall_chunks_end(); + return true; +} diff --git a/src/bitmap.h b/src/bitmap.h index 71a016ee..7fd09f43 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -212,4 +212,9 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx); bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx); +typedef bool (mi_forall_set_fun_t)(size_t slice_index, mi_arena_t* arena, void* arg2); + +// Visit all set bits in a bitmap +bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg); + #endif // MI_BITMAP_H diff --git a/src/os.c b/src/os.c index 9fcd5aed..86ecb16b 100644 --- a/src/os.c +++ b/src/os.c @@ -676,7 +676,7 @@ int _mi_os_numa_node_get(void) { /* ---------------------------------------------------------------------------- Public API -----------------------------------------------------------------------------*/ - +#if 0 mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size) { return mi_os_alloc_aligned(size, mi_os_mem_config.alloc_granularity, commit, NULL, full_size); } @@ -716,3 +716,4 @@ mi_decl_export void mi_os_commit(void* p, size_t size) { mi_decl_export void mi_os_decommit(void* p, size_t size) { _mi_os_decommit(p, size); } +#endif diff --git a/src/page-map.c b/src/page-map.c index 181db7f0..7b74c711 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -15,6 +15,7 @@ static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; static void* mi_page_map_max_address = NULL; static mi_memid_t mi_page_map_memid; + // (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization) static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0), { 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} }; @@ -84,7 +85,7 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* *page_start = mi_page_area(page, &page_size); if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer *slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks - return ((uintptr_t)page >> MI_ARENA_SLICE_SHIFT); + return _mi_page_map_index(page); } @@ -113,16 +114,20 @@ void _mi_page_map_register(mi_page_t* page) { void _mi_page_map_unregister(mi_page_t* page) { mi_assert_internal(_mi_page_map != NULL); - // get index and count uint8_t* page_start; size_t slice_count; const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count); - // unset the offsets _mi_memzero(_mi_page_map + idx, slice_count); } +void _mi_page_map_unregister_range(void* start, size_t size) { + const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE); + const uintptr_t index = _mi_page_map_index(start); + mi_page_map_ensure_committed(index, slice_count); // we commit the range in total; todo: scan the commit bits and clear only those ranges? + _mi_memzero(&_mi_page_map[index], slice_count); +} mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { // if mi_unlikely(_mi_page_map==NULL) { // happens on macOS during loading