add support for sub-processes (to supportpython/cpython#113717)

This commit is contained in:
daanx 2024-06-01 15:57:18 -07:00
parent f87a4c15b2
commit d9aa19a763
7 changed files with 119 additions and 44 deletions

View file

@ -288,8 +288,16 @@ mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_co
mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
#endif #endif
// Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them)
// Used for example for separate interpreter's in one process.
typedef void* mi_subproc_id_t;
mi_decl_export mi_subproc_id_t mi_subproc_new(void);
mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc);
mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
// deprecated // deprecated
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
// ------------------------------------------------------ // ------------------------------------------------------

View file

@ -130,14 +130,17 @@ void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment); bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment);
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment); void _mi_arena_segment_mark_abandoned(mi_segment_t* segment);
size_t _mi_arena_segment_abandoned_count(void);
typedef struct mi_arena_field_cursor_s { // abstract void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid);
void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size);
typedef struct mi_arena_field_cursor_s { // abstract struct
mi_arena_id_t start; mi_arena_id_t start;
int count; int count;
size_t bitmap_idx; size_t bitmap_idx;
mi_subproc_t* subproc;
} mi_arena_field_cursor_t; } mi_arena_field_cursor_t;
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current); void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous); mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
// "segment-map.c" // "segment-map.c"

View file

@ -307,7 +307,7 @@ typedef struct mi_page_s {
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`) uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used for separated heaps by object type uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
// padding // padding
size_t block_size; // size available in each block (always `>0`) size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the page area containing the blocks uint8_t* page_start; // start of the page area containing the blocks
@ -387,6 +387,7 @@ typedef struct mi_memid_s {
// --------------------------------------------------------------- // ---------------------------------------------------------------
// Segments contain mimalloc pages // Segments contain mimalloc pages
// --------------------------------------------------------------- // ---------------------------------------------------------------
typedef struct mi_subproc_s mi_subproc_t;
// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS. // Segments are large allocated memory blocks (2MiB on 64 bit) from the OS.
// Inside segments we allocated fixed size _pages_ that contain blocks. // Inside segments we allocated fixed size _pages_ that contain blocks.
@ -409,6 +410,7 @@ typedef struct mi_segment_s {
size_t capacity; // count of available pages (`#free + used`) size_t capacity; // count of available pages (`#free + used`)
size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages. size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages.
uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
mi_subproc_t* subproc; // segment belongs to sub process
// layout like this to optimize access in `mi_free` // layout like this to optimize access in `mi_free`
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
@ -600,10 +602,23 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------
// Sub processes do not reclaim or visit segments
// from other sub processes
// ------------------------------------------------------
struct mi_subproc_s {
_Atomic(size_t) abandoned_count; // count of abandoned segments for this sup-process
mi_memid_t memid; // provenance
};
mi_subproc_t* mi_subproc_from_id(mi_subproc_id_t subproc_id);
// ------------------------------------------------------ // ------------------------------------------------------
// Thread Local data // Thread Local data
// ------------------------------------------------------ // ------------------------------------------------------
// Milliseconds as in `int64_t` to avoid overflows
typedef int64_t mi_msecs_t; typedef int64_t mi_msecs_t;
// Queue of segments // Queue of segments
@ -628,8 +643,9 @@ typedef struct mi_segments_tld_s {
size_t current_size; // current size of all segments size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments size_t peak_size; // peak size of all segments
size_t reclaim_count;// number of reclaimed (abandoned) segments size_t reclaim_count;// number of reclaimed (abandoned) segments
mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_stats_t* stats; // points to tld stats mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats mi_os_tld_t* os; // points to os tld
} mi_segments_tld_t; } mi_segments_tld_t;
// Thread local data // Thread local data

View file

@ -172,7 +172,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m
return p; return p;
} }
static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid) {
*memid = _mi_memid_none(); *memid = _mi_memid_none();
// try static // try static
@ -180,7 +180,7 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st
if (p != NULL) return p; if (p != NULL) return p;
// or fall back to the OS // or fall back to the OS
p = _mi_os_alloc(size, memid, stats); p = _mi_os_alloc(size, memid, &_mi_stats_main);
if (p == NULL) return NULL; if (p == NULL) return NULL;
// zero the OS memory if needed // zero the OS memory if needed
@ -191,9 +191,9 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st
return p; return p;
} }
static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) {
if (mi_memkind_is_os(memid.memkind)) { if (mi_memkind_is_os(memid.memkind)) {
_mi_os_free(p, size, memid, stats); _mi_os_free(p, size, memid, &_mi_stats_main);
} }
else { else {
mi_assert(memid.memkind == MI_MEM_STATIC); mi_assert(memid.memkind == MI_MEM_STATIC);
@ -709,7 +709,7 @@ static void mi_arenas_unsafe_destroy(void) {
else { else {
new_max_arena = i; new_max_arena = i;
} }
mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size, &_mi_stats_main); _mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size);
} }
} }
@ -752,13 +752,6 @@ bool _mi_arena_contains(const void* p) {
the arena bitmaps. the arena bitmaps.
----------------------------------------------------------- */ ----------------------------------------------------------- */
// Maintain a count of all abandoned segments
static mi_decl_cache_align _Atomic(size_t)abandoned_count;
size_t _mi_arena_segment_abandoned_count(void) {
return mi_atomic_load_relaxed(&abandoned_count);
}
// reclaim a specific abandoned segment; `true` on success. // reclaim a specific abandoned segment; `true` on success.
// sets the thread_id. // sets the thread_id.
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment ) bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment )
@ -768,7 +761,7 @@ bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment )
// but we need to still claim it atomically -- we use the thread_id for that. // but we need to still claim it atomically -- we use the thread_id for that.
size_t expected = 0; size_t expected = 0;
if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected, _mi_thread_id())) { if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected, _mi_thread_id())) {
mi_atomic_decrement_relaxed(&abandoned_count); mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count);
return true; return true;
} }
else { else {
@ -785,7 +778,7 @@ bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment )
bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx); bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
if (was_marked) { if (was_marked) {
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
mi_atomic_decrement_relaxed(&abandoned_count); mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count);
mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
} }
// mi_assert_internal(was_marked); // mi_assert_internal(was_marked);
@ -802,9 +795,10 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used == segment->abandoned);
if (segment->memid.memkind != MI_MEM_ARENA) { if (segment->memid.memkind != MI_MEM_ARENA) {
// not in an arena; count it as abandoned and return // not in an arena; count it as abandoned and return
mi_atomic_increment_relaxed(&abandoned_count); mi_atomic_increment_relaxed(&segment->subproc->abandoned_count);
return; return;
} }
// segment is in an arena
size_t arena_idx; size_t arena_idx;
size_t bitmap_idx; size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
@ -812,17 +806,19 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
mi_assert_internal(arena != NULL); mi_assert_internal(arena != NULL);
const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
if (was_unmarked) { mi_atomic_increment_relaxed(&abandoned_count); } if (was_unmarked) { mi_atomic_increment_relaxed(&segment->subproc->abandoned_count); }
mi_assert_internal(was_unmarked); mi_assert_internal(was_unmarked);
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
} }
// start a cursor at a randomized arena // start a cursor at a randomized arena
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current) { void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current) {
mi_assert_internal(heap->tld->segments.subproc == subproc);
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena)); current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
current->count = 0; current->count = 0;
current->bitmap_idx = 0; current->bitmap_idx = 0;
current->subproc = subproc;
} }
// reclaim abandoned segments // reclaim abandoned segments
@ -830,7 +826,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* curre
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous ) mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous )
{ {
const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count); const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count);
if (max_arena <= 0 || mi_atomic_load_relaxed(&abandoned_count) == 0) return NULL; if (max_arena <= 0 || mi_atomic_load_relaxed(&previous->subproc->abandoned_count) == 0) return NULL;
int count = previous->count; int count = previous->count;
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
@ -853,14 +849,24 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
// try to reclaim it atomically // try to reclaim it atomically
if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) { if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) {
mi_atomic_decrement_relaxed(&abandoned_count);
previous->bitmap_idx = bitmap_idx;
previous->count = count;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); // check that belongs to our sub-process
return segment; if (segment->subproc != previous->subproc) {
// it is from another subprocess, re-mark it and continue searching
const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
mi_assert_internal(was_zero);
}
else {
// success, we unabandoned a segment in our sub-process
mi_atomic_decrement_relaxed(&previous->subproc->abandoned_count);
previous->bitmap_idx = bitmap_idx;
previous->count = count;
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return segment;
}
} }
} }
} }
@ -911,7 +917,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
const size_t bitmaps = (memid.is_pinned ? 3 : 5); const size_t bitmaps = (memid.is_pinned ? 3 : 5);
const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
mi_memid_t meta_memid; mi_memid_t meta_memid;
mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? mi_arena_t* arena = (mi_arena_t*)_mi_arena_meta_zalloc(asize, &meta_memid);
if (arena == NULL) return false; if (arena == NULL) return false;
// already zero'd due to zalloc // already zero'd due to zalloc

View file

@ -240,7 +240,8 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg
{ {
// the segment is abandoned, try to reclaim it into our heap // the segment is abandoned, try to reclaim it into our heap
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
mi_free(block); // recursively free as now it will be a local free in our heap mi_free(block); // recursively free as now it will be a local free in our heap
return; return;
} }

View file

@ -125,18 +125,20 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
extern mi_heap_t _mi_heap_main; extern mi_heap_t _mi_heap_main;
static mi_tld_t tld_main = { static mi_decl_cache_align mi_subproc_t mi_subproc_default;
static mi_decl_cache_align mi_tld_t tld_main = {
0, false, 0, false,
&_mi_heap_main, &_mi_heap_main, &_mi_heap_main, &_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, &mi_subproc_default,
&tld_main.stats, &tld_main.os &tld_main.stats, &tld_main.os
}, // segments }, // segments
{ 0, &tld_main.stats }, // os { 0, &tld_main.stats }, // os
{ MI_STATS_NULL } // stats { MI_STATS_NULL } // stats
}; };
mi_heap_t _mi_heap_main = { mi_decl_cache_align mi_heap_t _mi_heap_main = {
&tld_main, &tld_main,
MI_ATOMIC_VAR_INIT(NULL), MI_ATOMIC_VAR_INIT(NULL),
0, // thread id 0, // thread id
@ -179,6 +181,38 @@ mi_heap_t* _mi_heap_main_get(void) {
} }
/* -----------------------------------------------------------
Sub process
----------------------------------------------------------- */
mi_subproc_id_t mi_subproc_new(void) {
mi_memid_t memid = _mi_memid_none();
mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid);
if (subproc == NULL) return NULL;
subproc->memid = memid;
return subproc;
}
mi_subproc_t* mi_subproc_from_id(mi_subproc_id_t subproc_id) {
return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id);
}
void mi_subproc_delete(mi_subproc_id_t subproc_id) {
if (subproc_id == NULL) return;
mi_subproc_t* subproc = mi_subproc_from_id(subproc_id);
_mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t));
}
void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
mi_heap_t* heap = mi_heap_get_default();
if (heap == NULL) return;
mi_assert(heap->tld->segments.subproc == &mi_subproc_default);
if (heap->tld->segments.subproc != &mi_subproc_default) return;
heap->tld->segments.subproc = mi_subproc_from_id(subproc_id);
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Initialization and freeing of the thread local heaps Initialization and freeing of the thread local heaps
----------------------------------------------------------- */ ----------------------------------------------------------- */
@ -295,6 +329,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
_mi_memzero_aligned(tld,sizeof(mi_tld_t)); _mi_memzero_aligned(tld,sizeof(mi_tld_t));
tld->heap_backing = bheap; tld->heap_backing = bheap;
tld->heaps = NULL; tld->heaps = NULL;
tld->segments.subproc = &mi_subproc_default;
tld->segments.stats = &tld->stats; tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os; tld->segments.os = &tld->os;
tld->os.stats = &tld->stats; tld->os.stats = &tld->stats;

View file

@ -628,7 +628,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
segment->page_shift = page_shift; segment->page_shift = page_shift;
segment->segment_info_size = pre_size; segment->segment_info_size = pre_size;
segment->thread_id = _mi_thread_id(); segment->thread_id = _mi_thread_id();
segment->cookie = _mi_ptr_cookie(segment); segment->cookie = _mi_ptr_cookie(segment);
segment->subproc = tld->subproc;
// set protection // set protection
mi_segment_protect(segment, true, tld->os); mi_segment_protect(segment, true, tld->os);
@ -880,6 +881,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
// can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free. // can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free.
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id()); mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id());
mi_assert_internal(segment->subproc == heap->tld->segments.subproc); // only reclaim within the same subprocess
mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
segment->abandoned_visits = 0; segment->abandoned_visits = 0;
segment->was_reclaimed = true; segment->was_reclaimed = true;
@ -899,12 +901,13 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
segment->abandoned--; segment->abandoned--;
mi_assert(page->next == NULL); mi_assert(page->next == NULL);
_mi_stat_decrease(&tld->stats->pages_abandoned, 1); _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
// set the heap again and allow heap thread delayed free again. // get the target heap for this thread which has a matching heap tag (so we reclaim into a matching heap)
mi_heap_t* target_heap = _mi_heap_by_tag(heap, page->heap_tag); // allow custom heaps to separate objects mi_heap_t* target_heap = _mi_heap_by_tag(heap, page->heap_tag); // allow custom heaps to separate objects
if (target_heap == NULL) { if (target_heap == NULL) {
target_heap = heap; target_heap = heap;
_mi_error_message(EINVAL, "page with tag %u cannot be reclaimed by a heap with the same tag (using %u instead)\n", page->heap_tag, heap->tag ); _mi_error_message(EINVAL, "page with tag %u cannot be reclaimed by a heap with the same tag (using tag %u instead)\n", page->heap_tag, heap->tag );
} }
// associate the heap with this page, and allow heap thread delayed free again.
mi_page_set_heap(page, target_heap); mi_page_set_heap(page, target_heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date _mi_page_free_collect(page, false); // ensure used count is up to date
@ -944,7 +947,8 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
// attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`) // attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`)
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) { bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned
// don't reclaim more from a free than half the current segments if (segment->subproc != heap->tld->segments.subproc) return false; // only reclaim within the same subprocess
// don't reclaim more from a `free` call than half the current segments
// this is to prevent a pure free-ing thread to start owning too many segments // this is to prevent a pure free-ing thread to start owning too many segments
if (heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) return false; if (heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) return false;
if (_mi_arena_segment_clear_abandoned(segment)) { // atomically unabandon if (_mi_arena_segment_clear_abandoned(segment)) { // atomically unabandon
@ -957,17 +961,17 @@ bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
mi_segment_t* segment; mi_segment_t* segment;
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current); mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
while ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) { while ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) {
mi_segment_reclaim(segment, heap, 0, NULL, tld); mi_segment_reclaim(segment, heap, 0, NULL, tld);
} }
} }
static long mi_segment_get_reclaim_tries(void) { static long mi_segment_get_reclaim_tries(mi_segments_tld_t* tld) {
// limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries. // limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries.
const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100); const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100);
if (perc <= 0) return 0; if (perc <= 0) return 0;
const size_t total_count = _mi_arena_segment_abandoned_count(); const size_t total_count = mi_atomic_load_relaxed(&tld->subproc->abandoned_count);
if (total_count == 0) return 0; if (total_count == 0) return 0;
const size_t relative_count = (total_count > 10000 ? (total_count / 100) * perc : (total_count * perc) / 100); // avoid overflow const size_t relative_count = (total_count > 10000 ? (total_count / 100) * perc : (total_count * perc) / 100); // avoid overflow
long max_tries = (long)(relative_count <= 1 ? 1 : (relative_count > 1024 ? 1024 : relative_count)); long max_tries = (long)(relative_count <= 1 ? 1 : (relative_count > 1024 ? 1024 : relative_count));
@ -978,13 +982,14 @@ static long mi_segment_get_reclaim_tries(void) {
static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld) static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld)
{ {
*reclaimed = false; *reclaimed = false;
long max_tries = mi_segment_get_reclaim_tries(); long max_tries = mi_segment_get_reclaim_tries(tld);
if (max_tries <= 0) return NULL; if (max_tries <= 0) return NULL;
mi_segment_t* segment; mi_segment_t* segment;
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current); mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL)) while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL))
{ {
mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process
segment->abandoned_visits++; segment->abandoned_visits++;
// todo: should we respect numa affinity for abondoned reclaim? perhaps only for the first visit? // todo: should we respect numa affinity for abondoned reclaim? perhaps only for the first visit?
// todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments and use many tries // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments and use many tries
@ -1232,5 +1237,6 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
// mi_segment_try_purge(tld); // mi_segment_try_purge(tld);
mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc);
return page; return page;
} }