maintain pages set for arenas; improve arena load/unload

This commit is contained in:
daanx 2024-12-11 22:06:25 -08:00
parent aed76f2910
commit 94ce342ea9
6 changed files with 97 additions and 22 deletions

View file

@ -164,6 +164,7 @@ void _mi_meta_free(void* p, size_t size, mi_memid_t memid);
bool _mi_page_map_init(void); bool _mi_page_map_init(void);
void _mi_page_map_register(mi_page_t* page); void _mi_page_map_register(mi_page_t* page);
void _mi_page_map_unregister(mi_page_t* page); void _mi_page_map_unregister(mi_page_t* page);
void _mi_page_map_unregister_range(void* start, size_t size);
// "page.c" // "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
@ -437,14 +438,18 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
extern uint8_t* _mi_page_map; extern uint8_t* _mi_page_map;
static inline uintptr_t _mi_page_map_index(const void* p) {
return (((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT);
}
static inline mi_page_t* _mi_ptr_page_ex(const void* p, bool* valid) { static inline mi_page_t* _mi_ptr_page_ex(const void* p, bool* valid) {
#if 1 #if 1
const uintptr_t idx = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT; const uintptr_t idx = _mi_page_map_index(p);
const size_t ofs = _mi_page_map[idx]; const size_t ofs = _mi_page_map[idx];
if (valid != NULL) *valid = (ofs != 0); if (valid != NULL) *valid = (ofs != 0);
return (mi_page_t*)((idx - ofs + 1) << MI_ARENA_SLICE_SHIFT); return (mi_page_t*)((idx - ofs + 1) << MI_ARENA_SLICE_SHIFT);
#else #else
const uintptr_t idx = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT; const uintptr_t idx = _mi_page_map_index(p);
const uintptr_t up = idx << MI_ARENA_SLICE_SHIFT; const uintptr_t up = idx << MI_ARENA_SLICE_SHIFT;
__builtin_prefetch((void*)up); __builtin_prefetch((void*)up);
const size_t ofs = _mi_page_map[idx]; const size_t ofs = _mi_page_map[idx];

View file

@ -48,6 +48,7 @@ typedef struct mi_arena_s {
mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible) mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible)
mi_bitmap_t* slices_purge; // can the slice be purged? (slice in purge => slice in free) mi_bitmap_t* slices_purge; // can the slice be purged? (slice in purge => slice in free)
mi_bitmap_t* slices_dirty; // is the slice potentially non-zero? mi_bitmap_t* slices_dirty; // is the slice potentially non-zero?
mi_bitmap_t* pages; // all registered pages
mi_bitmap_t* pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page) mi_bitmap_t* pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page)
// the full queue contains abandoned full pages // the full queue contains abandoned full pages
// followed by the bitmaps (whose size depends on the arena size) // followed by the bitmaps (whose size depends on the arena size)
@ -117,7 +118,13 @@ static size_t mi_arena_info_slices(mi_arena_t* arena) {
return arena->info_slices; return arena->info_slices;
} }
#if MI_DEBUG > 1
static bool mi_arena_has_page(mi_arena_t* arena, mi_page_t* page) {
return (page->memid.memkind == MI_MEM_ARENA &&
page->memid.mem.arena.arena == arena &&
mi_bitmap_is_setN(arena->pages, page->memid.mem.arena.slice_index, 1));
}
#endif
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Util Util
@ -551,6 +558,7 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
mi_assert_internal(mi_page_is_owned(page)); mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page)); mi_assert_internal(mi_page_is_abandoned(page));
mi_assert_internal(mi_arena_has_page(arena,page));
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]); mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
_mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1);
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1); _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1);
@ -588,6 +596,10 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz
slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large
{ {
page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, tld->tseq, &memid); page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, tld->tseq, &memid);
if (page != NULL) {
mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count));
mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index);
}
} }
// otherwise fall back to the OS // otherwise fall back to the OS
@ -758,6 +770,7 @@ void _mi_arena_page_free(mi_page_t* page) {
mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1)); mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1));
mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1));
// note: we cannot check for `!mi_page_is_abandoned_and_mapped` since that may // note: we cannot check for `!mi_page_is_abandoned_and_mapped` since that may
// be (temporarily) not true if the free happens while trying to reclaim // be (temporarily) not true if the free happens while trying to reclaim
// see `mi_arana_try_claim_abandoned` // see `mi_arana_try_claim_abandoned`
@ -765,6 +778,9 @@ void _mi_arena_page_free(mi_page_t* page) {
#endif #endif
_mi_page_map_unregister(page); _mi_page_map_unregister(page);
if (page->memid.memkind == MI_MEM_ARENA) {
mi_bitmap_clear(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index);
}
mi_arena_free(page, mi_memid_size(page->memid), page->memid); mi_arena_free(page, mi_memid_size(page->memid), page->memid);
} }
@ -1104,6 +1120,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
arena->slices_committed = mi_arena_bitmap_init(slice_count,&base); arena->slices_committed = mi_arena_bitmap_init(slice_count,&base);
arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base); arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base);
arena->slices_purge = mi_arena_bitmap_init(slice_count,&base); arena->slices_purge = mi_arena_bitmap_init(slice_count,&base);
arena->pages = mi_arena_bitmap_init(slice_count, &base);
for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) { for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) {
arena->pages_abandoned[i] = mi_arena_bitmap_init(slice_count,&base); arena->pages_abandoned[i] = mi_arena_bitmap_init(slice_count,&base);
} }
@ -1396,6 +1413,18 @@ bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool vi
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Unloading and reloading an arena. Unloading and reloading an arena.
----------------------------------------------------------- */ ----------------------------------------------------------- */
static bool mi_arena_page_register(size_t slice_index, mi_arena_t* arena, void* arg) {
MI_UNUSED(arg);
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1));
_mi_page_map_register(page);
mi_assert_internal(_mi_ptr_page(page)==page);
return true;
}
static bool mi_arena_pages_reregister(mi_arena_t* arena) {
return _mi_bitmap_forall_set(arena->pages, &mi_arena_page_register, arena, NULL);
}
mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* full_size) { mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* full_size) {
const size_t count = mi_arena_get_count(); const size_t count = mi_arena_get_count();
@ -1416,18 +1445,23 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t*
_mi_warning_message("can only unload managed arena's for external memory (id %zu at %p)\n", arena_id, arena); _mi_warning_message("can only unload managed arena's for external memory (id %zu at %p)\n", arena_id, arena);
return false; return false;
} }
// find accessed size
size_t asize;
// scan the commit map for the highest entry
size_t idx;
if (mi_bitmap_bsr(arena->slices_committed, &idx)) {
asize = (idx + 1)* MI_ARENA_SLICE_SIZE;
}
else {
asize = mi_arena_info_slices(arena) * MI_ARENA_SLICE_SIZE;
}
if (base != NULL) { *base = (void*)arena; } if (base != NULL) { *base = (void*)arena; }
if (full_size != NULL) { *full_size = arena->memid.mem.os.size; } if (full_size != NULL) { *full_size = arena->memid.mem.os.size; }
if (accessed_size != NULL) { if (accessed_size != NULL) { *accessed_size = asize; }
// scan the commit map for the highest entry
size_t idx; // unregister the pages
if (mi_bitmap_bsr(arena->slices_committed, &idx)) { _mi_page_map_unregister_range(arena, asize);
*accessed_size = (idx + 1)* MI_ARENA_SLICE_SIZE;
}
else {
*accessed_size = mi_arena_info_slices(arena) * MI_ARENA_SLICE_SIZE;
}
}
// set the entry to NULL // set the entry to NULL
mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[arena_idx], NULL); mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[arena_idx], NULL);
@ -1438,7 +1472,7 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t*
return true; return true;
} }
bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, mi_arena_id_t* arena_id) { mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, mi_arena_id_t* arena_id) {
// assume the memory area is already containing the arena // assume the memory area is already containing the arena
if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); }
if (start == NULL || size == 0) return false; if (start == NULL || size == 0) return false;
@ -1466,6 +1500,10 @@ bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large,
arena->is_exclusive = true; arena->is_exclusive = true;
arena->is_large = is_large; arena->is_large = is_large;
arena->id = _mi_arena_id_none(); arena->id = _mi_arena_id_none();
return mi_arena_add(arena, arena_id, &_mi_stats_main); if (!mi_arena_add(arena, arena_id, &_mi_stats_main)) {
return false;
}
mi_arena_pages_reregister(arena);
return true;
} }

View file

@ -1051,7 +1051,6 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
#define mi_bitmap_forall_chunks(bitmap, tseq, name_chunk_idx) \ #define mi_bitmap_forall_chunks(bitmap, tseq, name_chunk_idx) \
{ \ { \
/* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \ /* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \
MI_UNUSED(tseq); \
const size_t chunk_max_acc = 1 + mi_atomic_load_relaxed(&bitmap->chunk_max_accessed); \ const size_t chunk_max_acc = 1 + mi_atomic_load_relaxed(&bitmap->chunk_max_accessed); \
const size_t chunk_start = tseq % chunk_max_acc; /* space out threads? */ \ const size_t chunk_start = tseq % chunk_max_acc; /* space out threads? */ \
const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap),MI_BFIELD_BITS); \ const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap),MI_BFIELD_BITS); \
@ -1197,3 +1196,25 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
mi_bchunk_clear_once_set(&bitmap->chunks[chunk_idx], cidx); mi_bchunk_clear_once_set(&bitmap->chunks[chunk_idx], cidx);
} }
// Visit all set bits in a bitmap.
// todo: optimize further? maybe popcount to help the branch predictor for the loop,
// and keep b constant (using a mask)? or avx512 to directly get all indices using a mask_compressstore?
bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) {
mi_bitmap_forall_chunks(bitmap, 0, chunk_idx) {
mi_bchunk_t* chunk = &bitmap->chunks[chunk_idx];
for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS);
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[j]);
size_t bidx;
while (mi_bsf(b, &bidx)) {
b = b & (b-1); // clear low bit
const size_t idx = base_idx + bidx;
if (!visit(idx, arena, arg)) return false;
}
}
}
mi_bitmap_forall_chunks_end();
return true;
}

View file

@ -212,4 +212,9 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx);
bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx); bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx);
typedef bool (mi_forall_set_fun_t)(size_t slice_index, mi_arena_t* arena, void* arg2);
// Visit all set bits in a bitmap
bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
#endif // MI_BITMAP_H #endif // MI_BITMAP_H

View file

@ -676,7 +676,7 @@ int _mi_os_numa_node_get(void) {
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
Public API Public API
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
#if 0
mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size) { mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size) {
return mi_os_alloc_aligned(size, mi_os_mem_config.alloc_granularity, commit, NULL, full_size); return mi_os_alloc_aligned(size, mi_os_mem_config.alloc_granularity, commit, NULL, full_size);
} }
@ -716,3 +716,4 @@ mi_decl_export void mi_os_commit(void* p, size_t size) {
mi_decl_export void mi_os_decommit(void* p, size_t size) { mi_decl_export void mi_os_decommit(void* p, size_t size) {
_mi_os_decommit(p, size); _mi_os_decommit(p, size);
} }
#endif

View file

@ -15,6 +15,7 @@ static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE;
static void* mi_page_map_max_address = NULL; static void* mi_page_map_max_address = NULL;
static mi_memid_t mi_page_map_memid; static mi_memid_t mi_page_map_memid;
// (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization) // (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization)
static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0), static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0),
{ 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} }; { 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} };
@ -84,7 +85,7 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t*
*page_start = mi_page_area(page, &page_size); *page_start = mi_page_area(page, &page_size);
if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer
*slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks *slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks
return ((uintptr_t)page >> MI_ARENA_SLICE_SHIFT); return _mi_page_map_index(page);
} }
@ -113,16 +114,20 @@ void _mi_page_map_register(mi_page_t* page) {
void _mi_page_map_unregister(mi_page_t* page) { void _mi_page_map_unregister(mi_page_t* page) {
mi_assert_internal(_mi_page_map != NULL); mi_assert_internal(_mi_page_map != NULL);
// get index and count // get index and count
uint8_t* page_start; uint8_t* page_start;
size_t slice_count; size_t slice_count;
const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count); const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
// unset the offsets // unset the offsets
_mi_memzero(_mi_page_map + idx, slice_count); _mi_memzero(_mi_page_map + idx, slice_count);
} }
void _mi_page_map_unregister_range(void* start, size_t size) {
const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
const uintptr_t index = _mi_page_map_index(start);
mi_page_map_ensure_committed(index, slice_count); // we commit the range in total; todo: scan the commit bits and clear only those ranges?
_mi_memzero(&_mi_page_map[index], slice_count);
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
// if mi_unlikely(_mi_page_map==NULL) { // happens on macOS during loading // if mi_unlikely(_mi_page_map==NULL) { // happens on macOS during loading