add initial support for visiting abandoned segments per subprocess, upstream for python/cpython#114133

This commit is contained in:
daanx 2024-06-02 07:47:08 -07:00
parent f93fb900b7
commit 8f874555d5
8 changed files with 206 additions and 88 deletions

View file

@ -262,7 +262,7 @@ typedef struct mi_heap_area_s {
typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg); typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// Experimental // Experimental
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
@ -292,9 +292,13 @@ mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t a
// Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them) // Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them)
// Used for example for separate interpreter's in one process. // Used for example for separate interpreter's in one process.
typedef void* mi_subproc_id_t; typedef void* mi_subproc_id_t;
mi_decl_export mi_subproc_id_t mi_subproc_main(void);
mi_decl_export mi_subproc_id_t mi_subproc_new(void); mi_decl_export mi_subproc_id_t mi_subproc_new(void);
mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc); mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc);
mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet) mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
// Experimental: visit abandoned heap areas (from threads that have been terminated)
mi_decl_export bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// deprecated // deprecated
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
@ -355,6 +359,7 @@ typedef enum mi_option_e {
mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1) mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's) mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's)
mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows) mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
mi_option_visit_abandoned, // allow visiting heap blocks from abandoned threads (=0)
_mi_option_last, _mi_option_last,
// legacy option names // legacy option names
mi_option_large_os_pages = mi_option_allow_large_os_pages, mi_option_large_os_pages = mi_option_allow_large_os_pages,

View file

@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#endif #endif
#include <windows.h> #include <windows.h>
#elif !defined(_WIN32) && (defined(__EMSCRIPTEN_SHARED_MEMORY__) || !defined(__wasi__)) #elif !defined(__wasi__) && (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))
#define MI_USE_PTHREADS #define MI_USE_PTHREADS
#include <pthread.h> #include <pthread.h>
#endif #endif
@ -35,9 +35,9 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_atomic(name) std::atomic_##name #define mi_atomic(name) std::atomic_##name
#define mi_memory_order(name) std::memory_order_##name #define mi_memory_order(name) std::memory_order_##name
#if (__cplusplus >= 202002L) // c++20, see issue #571 #if (__cplusplus >= 202002L) // c++20, see issue #571
#define MI_ATOMIC_VAR_INIT(x) x #define MI_ATOMIC_VAR_INIT(x) x
#elif !defined(ATOMIC_VAR_INIT) #elif !defined(ATOMIC_VAR_INIT)
#define MI_ATOMIC_VAR_INIT(x) x #define MI_ATOMIC_VAR_INIT(x) x
#else #else
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
#endif #endif
@ -337,6 +337,7 @@ typedef _Atomic(uintptr_t) mi_atomic_guard_t;
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
// Yield // Yield
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
#if defined(__cplusplus) #if defined(__cplusplus)
#include <thread> #include <thread>
static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) {
@ -401,59 +402,73 @@ static inline void mi_atomic_yield(void) {
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
// Locks are only used for abandoned segment visiting // Locks are only used for abandoned segment visiting in `arena.c`
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
#if defined(_WIN32) #if defined(_WIN32)
#define mi_lock_t CRITICAL_SECTION #define mi_lock_t CRITICAL_SECTION
static inline bool _mi_prim_lock(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return TryEnterCriticalSection(lock);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
EnterCriticalSection(lock); EnterCriticalSection(lock);
return true; return true;
} }
static inline void mi_lock_release(mi_lock_t* lock) {
static inline bool _mi_prim_try_lock(mi_lock_t* lock) {
return TryEnterCriticalSection(lock);
}
static inline void _mi_prim_unlock(mi_lock_t* lock) {
LeaveCriticalSection(lock); LeaveCriticalSection(lock);
} }
static inline void mi_lock_init(mi_lock_t* lock) {
InitializeCriticalSection(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
DeleteCriticalSection(lock);
}
#elif defined(MI_USE_PTHREADS) #elif defined(MI_USE_PTHREADS)
#define mi_lock_t pthread_mutex_t #define mi_lock_t pthread_mutex_t
static inline bool _mi_prim_lock(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return (pthread_mutex_lock(lock) == 0);
}
static inline bool _mi_prim_try_lock(mi_lock_t* lock) {
return (pthread_mutex_trylock(lock) == 0); return (pthread_mutex_trylock(lock) == 0);
} }
static inline bool mi_lock_acquire(mi_lock_t* lock) {
static inline void _mi_prim_unlock(mi_lock_t* lock) { return (pthread_mutex_lock(lock) == 0);
}
static inline void mi_lock_release(mi_lock_t* lock) {
pthread_mutex_unlock(lock); pthread_mutex_unlock(lock);
} }
static inline void mi_lock_init(mi_lock_t* lock) {
(void)(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#elif defined(__cplusplus) #elif defined(__cplusplus)
#include <mutex> #include <mutex>
#define mi_lock_t std::mutex #define mi_lock_t std::mutex
static inline bool _mi_prim_lock(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return lock->lock_try_acquire();
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
lock->lock(); lock->lock();
return true; return true;
} }
static inline void mi_lock_release(mi_lock_t* lock) {
static inline bool _mi_prim_try_lock(mi_lock_t* lock) {
return (lock->try_lock();
}
static inline void _mi_prim_unlock(mi_lock_t* lock) {
lock->unlock(); lock->unlock();
} }
static inline void mi_lock_init(mi_lock_t* lock) {
(void)(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#else #else
@ -462,22 +477,26 @@ static inline void _mi_prim_unlock(mi_lock_t* lock) {
#define mi_lock_t _Atomic(uintptr_t) #define mi_lock_t _Atomic(uintptr_t)
static inline bool _mi_prim_try_lock(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
uintptr_t expected = 0; uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1); return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1);
} }
static inline bool mi_lock_acquire(mi_lock_t* lock) {
static inline bool _mi_prim_lock(mi_lock_t* lock) {
for (int i = 0; i < 1000; i++) { // for at most 1000 tries? for (int i = 0; i < 1000; i++) { // for at most 1000 tries?
if (_mi_prim_try_lock(lock)) return true; if (mi_lock_try_acquire(lock)) return true;
mi_atomic_yield(); mi_atomic_yield();
} }
return true; return true;
} }
static inline void mi_lock_release(mi_lock_t* lock) {
static inline void _mi_prim_unlock(mi_lock_t* lock) {
mi_atomic_store_release(lock, (uintptr_t)0); mi_atomic_store_release(lock, (uintptr_t)0);
} }
static inline void mi_lock_init(mi_lock_t* lock) {
mi_lock_release(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#endif #endif

View file

@ -79,11 +79,12 @@ extern mi_decl_cache_align const mi_page_t _mi_page_empty;
bool _mi_is_main_thread(void); bool _mi_is_main_thread(void);
size_t _mi_current_thread_count(void); size_t _mi_current_thread_count(void);
bool _mi_preloading(void); // true while the C runtime is not initialized yet bool _mi_preloading(void); // true while the C runtime is not initialized yet
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
void _mi_thread_done(mi_heap_t* heap); void _mi_thread_done(mi_heap_t* heap);
void _mi_thread_data_collect(void); void _mi_thread_data_collect(void);
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap); void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
// os.c // os.c
void _mi_os_init(void); // called from process init void _mi_os_init(void); // called from process init
@ -136,7 +137,7 @@ typedef struct mi_arena_field_cursor_s { // abstract struct
mi_subproc_t* subproc; mi_subproc_t* subproc;
} mi_arena_field_cursor_t; } mi_arena_field_cursor_t;
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current); void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous); mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous, bool visit_all);
// "segment-map.c" // "segment-map.c"
void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_allocated_at(const mi_segment_t* segment);
@ -158,6 +159,7 @@ void _mi_segments_collect(bool force, mi_segments_tld_t* tld);
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
void _mi_abandoned_await_readers(void); void _mi_abandoned_await_readers(void);
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment); bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);
bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// "page.c" // "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
@ -189,6 +191,8 @@ void _mi_heap_set_default_direct(mi_heap_t* heap);
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
void _mi_heap_unsafe_destroy_all(void); void _mi_heap_unsafe_destroy_all(void);
mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag); mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag);
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg);
// "stats.c" // "stats.c"
void _mi_stats_done(mi_stats_t* stats); void _mi_stats_done(mi_stats_t* stats);

View file

@ -40,23 +40,24 @@ typedef uintptr_t mi_block_info_t;
// A memory arena descriptor // A memory arena descriptor
typedef struct mi_arena_s { typedef struct mi_arena_s {
mi_arena_id_t id; // arena id; 0 for non-specific mi_arena_id_t id; // arena id; 0 for non-specific
mi_memid_t memid; // memid of the memory area mi_memid_t memid; // memid of the memory area
_Atomic(uint8_t*) start; // the start of the memory area _Atomic(uint8_t*) start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
size_t meta_size; // size of the arena structure itself (including its bitmaps) size_t meta_size; // size of the arena structure itself (including its bitmaps)
mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
int numa_node; // associated NUMA node int numa_node; // associated NUMA node
bool exclusive; // only allow allocations if specifically for this arena bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed) bool is_large; // memory area consists of large- or huge OS pages (always committed)
_Atomic(size_t) search_idx; // optimization to start the search for free blocks mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. _Atomic(size_t) search_idx; // optimization to start the search for free blocks
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
// do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields.
} mi_arena_t; } mi_arena_t;
@ -65,7 +66,6 @@ typedef struct mi_arena_s {
static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0
//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept; //static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept;
/* ----------------------------------------------------------- /* -----------------------------------------------------------
@ -702,6 +702,7 @@ static void mi_arenas_unsafe_destroy(void) {
for (size_t i = 0; i < max_arena; i++) { for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
if (arena != NULL) { if (arena != NULL) {
mi_lock_done(&arena->abandoned_visit_lock);
if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
_mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
@ -813,9 +814,9 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
// start a cursor at a randomized arena // start a cursor at a randomized arena
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current) { void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current) {
mi_assert_internal(heap->tld->segments.subproc == subproc); mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc);
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena)); current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
current->count = 0; current->count = 0;
current->bitmap_idx = 0; current->bitmap_idx = 0;
current->subproc = subproc; current->subproc = subproc;
@ -823,7 +824,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_aren
// reclaim abandoned segments // reclaim abandoned segments
// this does not set the thread id (so it appears as still abandoned) // this does not set the thread id (so it appears as still abandoned)
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous ) mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous, bool visit_all )
{ {
const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count); const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count);
if (max_arena <= 0 || mi_atomic_load_relaxed(&previous->subproc->abandoned_count) == 0) return NULL; if (max_arena <= 0 || mi_atomic_load_relaxed(&previous->subproc->abandoned_count) == 0) return NULL;
@ -831,18 +832,31 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
int count = previous->count; int count = previous->count;
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1; size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1;
// visit arena's (from previous) // visit arena's (from the previous cursor)
for (; count < max_arena; count++, field_idx = 0, bit_idx = 0) { for (; count < max_arena; count++, field_idx = 0, bit_idx = 0) {
mi_arena_id_t arena_idx = previous->start + count; mi_arena_id_t arena_idx = previous->start + count;
if (arena_idx >= max_arena) { arena_idx = arena_idx % max_arena; } // wrap around if (arena_idx >= max_arena) { arena_idx = arena_idx % max_arena; } // wrap around
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
if (arena != NULL) { if (arena != NULL) {
bool has_lock = false;
// visit the abandoned fields (starting at previous_idx) // visit the abandoned fields (starting at previous_idx)
for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) { for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]); size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
if mi_unlikely(field != 0) { // skip zero fields quickly if mi_unlikely(field != 0) { // skip zero fields quickly
// we only take the arena lock if there are actually abandoned segments present
if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) {
has_lock = (visit_all ? mi_lock_acquire(&arena->abandoned_visit_lock) : mi_lock_try_acquire(&arena->abandoned_visit_lock));
if (!has_lock) {
if (visit_all) {
_mi_error_message(EINVAL, "failed to visit all abandoned segments due to failure to acquire the visitor lock");
}
// skip to next arena
break;
}
}
mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned));
// visit each set bit in the field (todo: maybe use `ctz` here?) // visit each set bit in the field (todo: maybe use `ctz` here?)
for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) { for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
// pre-check if the bit is set // pre-check if the bit is set
size_t mask = ((size_t)1 << bit_idx); size_t mask = ((size_t)1 << bit_idx);
if mi_unlikely((field & mask) == mask) { if mi_unlikely((field & mask) == mask) {
@ -852,7 +866,10 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
// check that belongs to our sub-process // check that the segment belongs to our sub-process
// note: this is the reason we need a lock in the case abandoned visiting is enabled.
// without the lock an abandoned visit may otherwise fail to visit all segments.
// for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the arena lock.
if (segment->subproc != previous->subproc) { if (segment->subproc != previous->subproc) {
// it is from another subprocess, re-mark it and continue searching // it is from another subprocess, re-mark it and continue searching
const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
@ -865,6 +882,7 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
previous->count = count; previous->count = count;
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
return segment; return segment;
} }
} }
@ -872,6 +890,7 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
} }
} }
} }
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
} }
} }
// no more found // no more found
@ -881,6 +900,29 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
} }
static bool mi_arena_visit_abandoned_blocks(mi_subproc_t* subproc, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
mi_arena_field_cursor_t current;
_mi_arena_field_cursor_init(NULL, subproc, &current);
mi_segment_t* segment;
while ((segment = _mi_arena_segment_clear_abandoned_next(&current, true /* visit all */)) != NULL) {
if (!_mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg)) return false;
}
return true;
}
bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
// (unfortunately) the visit_abandoned option must be enabled from the start.
// This is to avoid taking locks if abandoned list visiting is not required (as for most programs)
if (!mi_option_is_enabled(mi_option_visit_abandoned)) {
mi_assert(false);
_mi_error_message(EINVAL, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON");
return false;
}
// visit abandoned segments in the arena's
return mi_arena_visit_abandoned_blocks(_mi_subproc_from_id(subproc_id), heap_tag, visit_blocks, visitor, arg);
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Add an arena. Add an arena.
----------------------------------------------------------- */ ----------------------------------------------------------- */
@ -934,6 +976,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
arena->is_large = is_large; arena->is_large = is_large;
arena->purge_expire = 0; arena->purge_expire = 0;
arena->search_idx = 0; arena->search_idx = 0;
mi_lock_init(&arena->abandoned_visit_lock);
// consecutive bitmaps // consecutive bitmaps
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap

View file

@ -137,6 +137,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{ {
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
// if all memory is freed by now, all segments should be freed. // if all memory is freed by now, all segments should be freed.
// note: this only collects in the current subprocess
_mi_abandoned_reclaim_all(heap, &heap->tld->segments); _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
} }
@ -515,17 +516,21 @@ bool mi_check_owned(const void* p) {
enable visiting all blocks of all heaps across threads enable visiting all blocks of all heaps across threads
----------------------------------------------------------- */ ----------------------------------------------------------- */
// Separate struct to keep `mi_page_t` out of the public interface void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
typedef struct mi_heap_area_ex_s { const size_t bsize = mi_page_block_size(page);
mi_heap_area_t area; const size_t ubsize = mi_page_usable_block_size(page);
mi_page_t* page; area->reserved = page->reserved * bsize;
} mi_heap_area_ex_t; area->committed = page->capacity * bsize;
area->blocks = mi_page_start(page);
area->used = page->used; // number of blocks in use (#553)
area->block_size = ubsize;
area->full_block_size = bsize;
}
static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) {
mi_assert(xarea != NULL); bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) {
if (xarea==NULL) return true; mi_assert(area != NULL);
const mi_heap_area_t* area = &xarea->area; if (area==NULL) return true;
mi_page_t* page = xarea->page;
mi_assert(page != NULL); mi_assert(page != NULL);
if (page == NULL) return true; if (page == NULL) return true;
@ -590,23 +595,23 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
return true; return true;
} }
typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
// Separate struct to keep `mi_page_t` out of the public interface
typedef struct mi_heap_area_ex_s {
mi_heap_area_t area;
mi_page_t* page;
} mi_heap_area_ex_t;
typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
MI_UNUSED(heap); MI_UNUSED(heap);
MI_UNUSED(pq); MI_UNUSED(pq);
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
mi_heap_area_ex_t xarea; mi_heap_area_ex_t xarea;
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page);
xarea.page = page; xarea.page = page;
xarea.area.reserved = page->reserved * bsize; _mi_heap_area_init(&xarea.area, page);
xarea.area.committed = page->capacity * bsize;
xarea.area.blocks = mi_page_start(page);
xarea.area.used = page->used; // number of blocks in use (#553)
xarea.area.block_size = ubsize;
xarea.area.full_block_size = bsize;
return fun(heap, &xarea, arg); return fun(heap, &xarea, arg);
} }
@ -627,7 +632,7 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t*
mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg; mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false; if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false;
if (args->visit_blocks) { if (args->visit_blocks) {
return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg); return _mi_heap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg);
} }
else { else {
return true; return true;

View file

@ -185,22 +185,30 @@ mi_heap_t* _mi_heap_main_get(void) {
Sub process Sub process
----------------------------------------------------------- */ ----------------------------------------------------------- */
static mi_decl_cache_align _Atomic(uintptr_t) mi_subproc_count;
mi_subproc_id_t mi_subproc_main(void) {
return NULL;
}
mi_subproc_id_t mi_subproc_new(void) { mi_subproc_id_t mi_subproc_new(void) {
mi_memid_t memid = _mi_memid_none(); mi_memid_t memid = _mi_memid_none();
mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid); mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid);
if (subproc == NULL) return NULL; if (subproc == NULL) return NULL;
mi_atomic_increment_relaxed(&mi_subproc_count);
subproc->memid = memid; subproc->memid = memid;
return subproc; return subproc;
} }
static mi_subproc_t* mi_subproc_from_id(mi_subproc_id_t subproc_id) { mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) {
return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id); return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id);
} }
void mi_subproc_delete(mi_subproc_id_t subproc_id) { void mi_subproc_delete(mi_subproc_id_t subproc_id) {
if (subproc_id == NULL) return; if (subproc_id == NULL) return;
mi_subproc_t* subproc = mi_subproc_from_id(subproc_id); mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
_mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t)); _mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t));
mi_atomic_decrement_relaxed(&mi_subproc_count);
} }
void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
@ -208,7 +216,7 @@ void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
if (heap == NULL) return; if (heap == NULL) return;
mi_assert(heap->tld->segments.subproc == &mi_subproc_default); mi_assert(heap->tld->segments.subproc == &mi_subproc_default);
if (heap->tld->segments.subproc != &mi_subproc_default) return; if (heap->tld->segments.subproc != &mi_subproc_default) return;
heap->tld->segments.subproc = mi_subproc_from_id(subproc_id); heap->tld->segments.subproc = _mi_subproc_from_id(subproc_id);
} }

View file

@ -94,6 +94,11 @@ static mi_option_desc_t options[_mi_option_last] =
{ 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
{ 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) { 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. { 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
#if defined(MI_VISIT_ABANDONED)
{ 1, INITIALIZED, MI_OPTION(visit_abandoned) }, // allow visiting heap blocks in abandonded segments; requires taking locks during reclaim.
#else
{ 0, UNINIT, MI_OPTION(visit_abandoned) },
#endif
}; };
static void mi_option_init(mi_option_desc_t* desc); static void mi_option_init(mi_option_desc_t* desc);

View file

@ -962,7 +962,7 @@ bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
mi_segment_t* segment; mi_segment_t* segment;
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current); mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
while ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) { while ((segment = _mi_arena_segment_clear_abandoned_next(&current, true /* blocking */)) != NULL) {
mi_segment_reclaim(segment, heap, 0, NULL, tld); mi_segment_reclaim(segment, heap, 0, NULL, tld);
} }
} }
@ -987,7 +987,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
mi_segment_t* segment; mi_segment_t* segment;
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current); mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL)) while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current, false /* non-blocking */)) != NULL))
{ {
mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process
segment->abandoned_visits++; segment->abandoned_visits++;
@ -1240,3 +1240,32 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc); mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc);
return page; return page;
} }
/* -----------------------------------------------------------
Visit blocks in a segment (only used for abandoned segments)
----------------------------------------------------------- */
static bool mi_segment_visit_page(mi_page_t* page, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
mi_heap_area_t area;
_mi_heap_area_init(&area, page);
if (!visitor(NULL, &area, NULL, area.block_size, arg)) return false;
if (visit_blocks) {
return _mi_heap_area_visit_blocks(&area, page, visitor, arg);
}
else {
return true;
}
}
bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* const page = &segment->pages[i];
if (page->segment_in_use) {
if (heap_tag < 0 || (int)page->heap_tag == heap_tag) {
if (!mi_segment_visit_page(page, visit_blocks, visitor, arg)) return false;
}
}
}
return true;
}