track abandoned segments in an arena bitmap instead of with a list

This commit is contained in:
daanx 2024-02-29 14:27:28 -08:00
parent 7020ed5e52
commit bdda13b880
4 changed files with 122 additions and 176 deletions

View file

@ -124,6 +124,10 @@ bool _mi_arena_contains(const void* p);
void _mi_arena_collect(bool force_purge, mi_stats_t* stats);
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
bool _mi_arena_segment_clear_abandoned(mi_memid_t memid);
void _mi_arena_segment_mark_abandoned(mi_memid_t memid);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_id_t* current_id, size_t* current_idx);
// "segment-map.c"
void _mi_segment_map_allocated_at(const mi_segment_t* segment);
void _mi_segment_map_freed_at(const mi_segment_t* segment);

View file

@ -373,7 +373,6 @@ typedef struct mi_segment_s {
size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE`
// segment fields
_Atomic(struct mi_segment_s*) abandoned_next;
struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init`
struct mi_segment_s* prev;

View file

@ -55,6 +55,7 @@ typedef struct mi_arena_s {
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
} mi_arena_t;
@ -727,6 +728,89 @@ bool _mi_arena_contains(const void* p) {
return false;
}
/* -----------------------------------------------------------
Abandoned blocks/segments.
This is used to atomically abandon/reclaim segments
(and crosses the arena API but it is convenient to have here).
Abandoned segments still have live blocks; they get reclaimed
when a thread frees in it, or when a thread needs a fresh
segment; these threads scan the abandoned segments through
the arena bitmaps.
----------------------------------------------------------- */
// reclaim a specific abandoned segment; `true` on success.
bool _mi_arena_segment_clear_abandoned(mi_memid_t memid )
{
if (memid.memkind != MI_MEM_ARENA) return true; // not in an arena, consider it un-abandoned
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
mi_assert_internal(arena != NULL);
bool was_abandoned = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
mi_assert_internal(was_abandoned);
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return was_abandoned;
}
// mark a specific segment as abandoned
void _mi_arena_segment_mark_abandoned(mi_memid_t memid)
{
if (memid.memkind != MI_MEM_ARENA) return; // not in an arena
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
mi_assert_internal(arena != NULL);
const bool was_unset = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
mi_assert_internal(was_unset);
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
}
// reclaim abandoned segments
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_id_t* previous_id, size_t* previous_idx )
{
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
int arena_idx = *previous_id;
size_t field_idx = mi_bitmap_index_field(*previous_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(*previous_idx) + 1;
// visit arena's (from previous)
for( ; arena_idx < max_arena; arena_idx++, field_idx = 0, bit_idx = 0) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
if (arena != NULL) {
// visit the abandoned fields (starting at previous_idx)
for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
mi_bitmap_field_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
if mi_unlikely(field != 0) { // skip zero fields quickly
// visit each set bit in the field (todo: maybe use `ctz` here?)
for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
// pre-check if the bit is set
mi_bitmap_field_t mask = ((mi_bitmap_field_t)1 << bit_idx);
if mi_unlikely((field & mask) == mask) {
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
// try to reclaim it atomically
if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) {
*previous_idx = bitmap_idx;
*previous_id = arena_idx;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
}
}
}
}
}
}
}
// no more found
*previous_idx = 0;
*previous_id = 0;
return NULL;
}
/* -----------------------------------------------------------
Add an arena.
@ -760,13 +844,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
const size_t bitmaps = (memid.is_pinned ? 2 : 4);
const size_t bitmaps = (memid.is_pinned ? 3 : 5);
const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
mi_memid_t meta_memid;
mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
if (arena == NULL) return false;
// already zero'd due to os_alloc
// already zero'd due to zalloc
// _mi_memzero(arena, asize);
arena->id = _mi_arena_id_none();
arena->memid = memid;
@ -780,9 +864,11 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
arena->is_large = is_large;
arena->purge_expire = 0;
arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap
// consequetive bitmaps
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after abandonde bitmap
arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[4*fields]); // just after committed bitmap
// initialize committed bitmap?
if (arena->blocks_committed != NULL && arena->memid.initially_committed) {
memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning

View file

@ -587,9 +587,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
if (segment == NULL) return NULL;
mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
mi_assert_internal(segment->memid.is_pinned ? segment->memid.initially_committed : true);
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan
// zero the segment info (but not the `mem` fields)
ptrdiff_t ofs = offsetof(mi_segment_t, next);
_mi_memzero((uint8_t*)segment + ofs, info_size - ofs);
@ -743,171 +741,25 @@ Abandonment
When threads terminate, they can leave segments with
live blocks (reached through other threads). Such segments
are "abandoned" and will be reclaimed by other threads to
reuse their pages and/or free them eventually
reuse their pages and/or free them eventually. The
`thread_id` of such segments is 0.
We maintain a global list of abandoned segments that are
reclaimed on demand. Since this is shared among threads
the implementation needs to avoid the A-B-A problem on
popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
We use tagged pointers to avoid accidentally identifying
reused segments, much like stamped references in Java.
Secondly, we maintain a reader counter to avoid resetting
or decommitting segments that have a pending read operation.
When a block is freed in an abandoned segment, the segment
is reclaimed into that thread.
Note: the current implementation is one possible design;
another way might be to keep track of abandoned segments
in the regions. This would have the advantage of keeping
all concurrent code in one place and not needing to deal
with ABA issues. The drawback is that it is unclear how to
scan abandoned segments efficiently in that case as they
would be spread among all other segments in the regions.
Moreover, if threads are looking for a fresh segment, they
will first consider abondoned segments -- these can be found
by scanning the arena memory
(segments outside arena memoryare only reclaimed by a free).
----------------------------------------------------------- */
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// to put in a tag that increments on update to avoid the A-B-A problem.
#define MI_TAGGED_MASK MI_SEGMENT_MASK
typedef uintptr_t mi_tagged_segment_t;
// Maintain these for debug purposes
static mi_decl_cache_align _Atomic(size_t)abandoned_count;
static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) {
return (mi_segment_t*)(ts & ~MI_TAGGED_MASK);
}
static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) {
mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0);
uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK;
return ((uintptr_t)segment | tag);
}
// This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL
// The abandoned page list (tagged as it supports pop)
static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// Maintain these for debug purposes (these counts may be a bit off)
static mi_decl_cache_align _Atomic(size_t) abandoned_count;
static mi_decl_cache_align _Atomic(size_t) abandoned_visited_count;
// We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might
// still be read.
static mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0
// Push on the visited list
static void mi_abandoned_visited_push(mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL);
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_internal(segment->used > 0);
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited);
do {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext);
} while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment));
mi_atomic_increment_relaxed(&abandoned_visited_count);
}
// Move the visited list to the abandoned list.
static bool mi_abandoned_visited_revisit(void)
{
// quick check if the visited list is empty
if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false;
// grab the whole visited list
mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL);
if (first == NULL) return false;
// first try to swap directly if the abandoned list happens to be NULL
mi_tagged_segment_t afirst;
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
if (mi_tagged_segment_ptr(ts)==NULL) {
size_t count = mi_atomic_load_relaxed(&abandoned_visited_count);
afirst = mi_tagged_segment(first, ts);
if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) {
mi_atomic_add_relaxed(&abandoned_count, count);
mi_atomic_sub_relaxed(&abandoned_visited_count, count);
return true;
}
}
// find the last element of the visited list: O(n)
mi_segment_t* last = first;
mi_segment_t* next;
while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) {
last = next;
}
// and atomically prepend to the abandoned list
// (no need to increase the readers as we don't access the abandoned segments)
mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned);
size_t count;
do {
count = mi_atomic_load_relaxed(&abandoned_visited_count);
mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext));
afirst = mi_tagged_segment(first, anext);
} while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst));
mi_atomic_add_relaxed(&abandoned_count, count);
mi_atomic_sub_relaxed(&abandoned_visited_count, count);
return true;
}
// Push on the abandoned list.
static void mi_abandoned_push(mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_internal(segment->used > 0);
mi_tagged_segment_t next;
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
do {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts));
next = mi_tagged_segment(segment, ts);
} while (!mi_atomic_cas_weak_release(&abandoned, &ts, next));
mi_atomic_increment_relaxed(&abandoned_count);
}
// Wait until there are no more pending reads on segments that used to be in the abandoned list
// legacy: Wait until there are no more pending reads on segments that used to be in the abandoned list
void _mi_abandoned_await_readers(void) {
size_t n;
do {
n = mi_atomic_load_acquire(&abandoned_readers);
if (n != 0) mi_atomic_yield();
} while (n != 0);
}
// Pop from the abandoned list
static mi_segment_t* mi_abandoned_pop(void) {
mi_segment_t* segment;
// Check efficiently if it is empty (or if the visited list needs to be moved)
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
segment = mi_tagged_segment_ptr(ts);
if mi_likely(segment == NULL) {
if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL
return NULL;
}
}
// Do a pop. We use a reader count to prevent
// a segment to be decommitted while a read is still pending,
// and a tagged pointer to prevent A-B-A link corruption.
// (this is called from `region.c:_mi_mem_free` for example)
mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted
mi_tagged_segment_t next = 0;
ts = mi_atomic_load_acquire(&abandoned);
do {
segment = mi_tagged_segment_ptr(ts);
if (segment != NULL) {
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next);
next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
}
} while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next));
mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock
if (segment != NULL) {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
mi_atomic_decrement_relaxed(&abandoned_count);
}
return segment;
// nothing needed
}
/* -----------------------------------------------------------
@ -917,7 +769,6 @@ static mi_segment_t* mi_abandoned_pop(void) {
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->used == segment->abandoned);
mi_assert_internal(segment->used > 0);
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
// remove the segment from the free page queue if needed
@ -931,8 +782,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_segments_track_size(-((long)segment->segment_size), tld);
segment->thread_id = 0;
segment->abandoned_visits = 0;
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
mi_abandoned_push(segment);
_mi_arena_segment_mark_abandoned(segment->memid); mi_atomic_increment_relaxed(&abandoned_count);
}
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@ -995,7 +845,6 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool
// Reclaim a segment; returns NULL if the segment was freed
// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
segment->thread_id = _mi_thread_id();
@ -1056,7 +905,10 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
mi_segment_t* segment;
while ((segment = mi_abandoned_pop()) != NULL) {
mi_arena_id_t current_id = 0;
size_t current_idx = 0;
while ((segment = _mi_arena_segment_clear_abandoned_next(&current_id, &current_idx)) != NULL) {
mi_atomic_decrement_relaxed(&abandoned_count);
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
}
@ -1065,8 +917,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
{
*reclaimed = false;
mi_segment_t* segment;
long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
mi_arena_id_t current_id = 0;
size_t current_idx = 0;
long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 1024); // limit the work to bound allocation times
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current_id, &current_idx)) != NULL))
{
mi_atomic_decrement_relaxed(&abandoned_count);
segment->abandoned_visits++;
// todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
// and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way?
@ -1092,9 +948,10 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
else {
// otherwise, push on the visited list so it gets not looked at too quickly again
// otherwise, mark it back as abandoned
// todo: reset delayed pages in the segment?
mi_abandoned_visited_push(segment);
mi_atomic_increment_relaxed(&abandoned_count);
_mi_arena_segment_mark_abandoned(segment->memid);
}
}
return NULL;