wip: cannot compile

This commit is contained in:
daanx 2024-12-01 16:26:59 -08:00
parent 1d7a9f62a5
commit 2f789aae9a
5 changed files with 181 additions and 77 deletions

View file

@ -92,8 +92,10 @@ bool _mi_preloading(void); // true while the C runtime is not in
void _mi_thread_done(mi_heap_t* heap);
void _mi_thread_data_collect(void);
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
size_t _mi_thread_seq_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
void _mi_heap_guarded_init(mi_heap_t* heap);
@ -180,8 +182,6 @@ void _mi_heap_delayed_free_all(mi_heap_t* heap);
bool _mi_heap_delayed_free_partial(mi_heap_t* heap);
void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
void _mi_deferred_free(mi_heap_t* heap, bool force);
@ -426,6 +426,10 @@ static inline uintptr_t _mi_ptr_cookie(const void* p) {
return ((uintptr_t)p ^ _mi_heap_main.cookie);
}
static inline mi_tld_t* _mi_tld(void) {
return mi_heap_get_default()->tld;
}
/* -----------------------------------------------------------
Pages
----------------------------------------------------------- */
@ -507,53 +511,53 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
return mi_page_block_size(page) - MI_PADDING_SIZE;
}
//static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
// mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
// if (heap != NULL) {
// mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
// page->heap_tag = heap->tag;
// mi_atomic_store_release(&page->xthread_id, heap->thread_id);
// }
// else {
// mi_atomic_store_release(&page->xheap, (uintptr_t)mi_page_heap(page)->tld->subproc);
// mi_atomic_store_release(&page->xthread_id,0);
// }
//}
// Thread free flag helpers
static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
return (mi_block_t*)(tf & ~1);
}
static inline bool mi_tf_is_owned(mi_thread_free_t tf) {
return ((tf & 1) == 0);
}
static inline mi_thread_free_t mi_tf_create(mi_block_t* block, bool owned) {
return (mi_thread_free_t)((uintptr_t)block | (owned ? 0 : 1));
}
// Thread free access
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
return mi_tf_block(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
}
static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3);
}
// Heap access
static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
// Owned?
static inline bool mi_page_is_owned(const mi_page_t* page) {
return mi_tf_is_owned(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
}
// Thread id of thread that owns this page
static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
return mi_atomic_load_relaxed(&page->xthread_id);
}
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
if (heap != NULL) {
mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
page->heap_tag = heap->tag;
mi_atomic_store_release(&page->xthread_id, heap->thread_id);
}
else {
mi_atomic_store_release(&page->xheap, (uintptr_t)mi_page_heap(page)->tld->subproc);
mi_atomic_store_release(&page->xthread_id,0);
}
}
// Thread free flag helpers
static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
return (mi_block_t*)(tf & ~0x03);
}
static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) {
return (mi_delayed_t)(tf & 0x03);
}
static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) {
return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed);
}
static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
return mi_tf_make(mi_tf_block(tf),delayed);
}
static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
return mi_tf_make(block, mi_tf_delayed(tf));
}
//static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
// return mi_tf_make(mi_tf_block(tf),delayed);
//}
//static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
// return mi_tf_make(block, mi_tf_delayed(tf));
//}
// are all blocks in a page freed?
// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.

View file

@ -216,13 +216,14 @@ typedef struct mi_block_s {
#endif
// The delayed flags are used for efficient multi-threaded free-ing
typedef enum mi_delayed_e {
MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
MI_NEVER_DELAYED_FREE = 3 // sticky: used for abondoned pages without a owning heap; this only resets on page reclaim
} mi_delayed_t;
// The owned flags are used for efficient multi-threaded free-ing
// When we push on the page thread free queue of an abandoned page,
// we also atomically get to own it. This is needed to atomically
// abandon a page (while other threads could concurrently free blocks in it).
typedef enum mi_owned_e {
MI_OWNED = 0, // some heap owns this page
MI_ABANDONED = 1, // the page is abandoned
} mi_owned_t;
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
@ -247,7 +248,7 @@ typedef union mi_page_flags_s {
#endif
// Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
// We use the bottom bit of the pointer for `mi_owned_t` flags
typedef uintptr_t mi_thread_free_t;
// Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython)
@ -304,10 +305,11 @@ typedef struct mi_page_s {
#endif
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap; // heap this threads belong to.
// _Atomic(uintptr_t) xheap; // heap this threads belong to.
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
mi_subproc_t* subproc; // sub-process of this heap
mi_memid_t memid; // provenance of the page memory
} mi_page_t;

View file

@ -693,3 +693,48 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t
mi_bitmap_forall_set_chunks_end();
return false;
}
mi_decl_nodiscard bool mi_pairmap_xset(mi_pair_t set, mi_bitmap_t* bitmap, size_t idx);
mi_decl_nodiscard bool mi_pairmap_xset_while_not_busy(mi_pair_t set, mi_bitmap_t* bitmap, size_t idx);
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx) {
size_t set_idx;
size_t start = tseq % MI_BFIELD_BITS;
size_t epoch = mi_atomic_load_acquire(&pairmap->epoch);
mi_bfield_t any_set = mi_bfield_rotate_right(mi_atomic_load_relaxed(&pairmap->any_set), start);
while (mi_bfield_find_least_bit(any_set, &set_idx)) {
size_t chunk_idx = 2*((set_idx + start) % MI_BFIELD_BITS);
{
// look at chunk_idx and chunck_idx+1
mi_bitmap_chunk_t* chunk1 = &pairmap->chunks[chunk_idx];
mi_bitmap_chunk_t* chunk2 = &pairmap->chunks[chunk_idx+1];
size_t cidx;
if (mi_pairmap_chunk_find_and_set_busy(chunk1, &cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx < MI_PAIRMAP_MAX_BITS);
return true;
}
else {
if (mi_pairmap_chunk_find_and_set_busy(chunk2, &cidx)) {
*pidx = ((chunk_idx+1) * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx < MI_PAIRMAP_MAX_BITS);
return true;
}
else if (mi_bitmap_chunk_all_are_clear(chunk1) && mi_bitmap_chunk_all_are_clear(chunk2)) {
mi_bfield_atomic_xset(MI_BIT_CLEAR, &pairmap->any_set, chunk_idx/2);
}
}
else {
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR, &bitmap->any_set, chunk_idx);
}
}
}
start += set_idx+1; /* so chunk_idx stays valid */
any_set >>= set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */
any_set >>= 1;
}
}

View file

@ -41,7 +41,7 @@ typedef mi_decl_align(32) struct mi_bitmap_s {
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
/* --------------------------------------------------------------------------------
Bitmap
Atomic bitmap
-------------------------------------------------------------------------------- */
typedef bool mi_bit_t;
@ -89,4 +89,30 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx );
/* --------------------------------------------------------------------------------
Atomic bitmap for a pair of bits
-------------------------------------------------------------------------------- */
typedef mi_bfield_t mi_pair_t;
#define MI_PAIR_CLEAR (0)
#define MI_PAIR_BUSY (1)
#define MI_PAIR_BUSYX (2)
#define MI_PAIR_SET (3)
typedef mi_decl_align(32) struct mi_pairmap_s {
mi_bitmap_chunk_t chunks[2*MI_BFIELD_BITS];
_Atomic(mi_bfield_t) any_set;
_Atomic(size_t) epoch;
} mi_pairmap_t;
#define MI_PAIRMAP_MAX_PAIRS (MI_BITMAP_MAX_BITS) // 16k pairs on 64bit, 8k pairs on 32bit
#define MI_PAIRMAP_MAX_BITS (2*MI_PAIRMAP_MAX_PAIRS)
mi_decl_nodiscard bool mi_pairmap_xset(mi_pair_t set, mi_pairmap_t* pairmap, size_t idx);
mi_decl_nodiscard bool mi_pairmap_xset_while_not_busy(mi_pair_t set, mi_pairmap_t* pairmap, size_t idx);
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t n, size_t tseq, size_t* pidx);
#endif // MI_XBITMAP_H

View file

@ -147,39 +147,66 @@ void mi_free(void* p) mi_attr_noexcept
}
}
// return true if successful
bool _mi_free_delayed_block(mi_block_t* block) {
// get segment and page
mi_assert_internal(block!=NULL);
mi_page_t* const page = mi_checked_ptr_page(block,"_mi_free_delayed_block");
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
// Clear the no-delayed flag so delayed freeing is used again for this page.
// This must be done before collecting the free lists on this page -- otherwise
// some blocks may end up in the page `thread_free` list with no blocks in the
// heap `thread_delayed_free` list which may cause the page to be never freed!
// (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
return false;
}
// collect all other non-local frees (move from `thread_free` to `free`) to ensure up-to-date `used` count
_mi_page_free_collect(page, false);
// and free the block (possibly freeing the page as well since `used` is updated)
mi_free_block_local(page, block, false /* stats have already been adjusted */, true /* check for a full page */);
return true;
}
// ------------------------------------------------------
// Multi-threaded Free (`_mt`)
// ------------------------------------------------------
// Push a block that is owned by another thread on its page-local thread free
// list or it's heap delayed free list. Such blocks are later collected by
// the owning thread in `_mi_free_delayed_block`.
static void mi_decl_noinline mi_free_try_reclaim_mt(mi_page_t* page) {
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_thread_id(page)==0);
// we own the page now..
// first remove it from the abandoned pages in the arena
mi_heap_t* const heap = mi_heap_get_default();
_mi_arena_page_unabandon(page,heap->tld);
// collect the thread atomic free list
_mi_page_free_collect(page, false); // update `used` count
if (mi_page_is_singleton(page)) mi_assert_internal(mi_page_all_free(page));
if (mi_page_all_free(page)) {
// we can free the page directly
_mi_arena_page_free(page, heap->tld);
}
else {
// the page has still some blocks in use
// reclaim in our heap if compatible, or otherwise abandon again
if ((_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0) &&
(mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) && // we did not already terminate our thread (can this happen? yes, due to thread-local destructors for example (issue #944))
(page->subproc == heap->tld->subproc) && // don't reclaim across sub-processes
mi_arena_page_try_reclaim(page) // and we can reclaim it from the arena
)
{
// make it part of our heap
_mi_heap_page_reclaim(heap, page);
}
else {
// abandon again
_mi_arena_page_abandon(page, heap->tld);
}
}
}
// Push a block that is owned by another thread on its page-local thread free list.
static void mi_decl_noinline mi_free_block_delayed_mt(mi_page_t* page, mi_block_t* block)
{
// push atomically on the page thread free list
mi_thread_free_t tf_new;
mi_thread_free_t tf;
do {
tf = mi_atomic_load_relaxed(&page->xthread_free);
mi_block_set_next(page, block, mi_tf_block(tf));
tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf, tf_new));
// and atomically reclaim the page if it was abandoned
bool reclaimed = !mi_tf_is_owned(tf);
if (reclaimed) mi_free_try_reclaim_mt(page);
}
/*
// Try to put the block on either the page-local thread free list,
// or the heap delayed free list (if this is the first non-local free in that page)
mi_thread_free_t tfreex;
@ -276,7 +303,7 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block
// thread_delayed free list (or heap delayed free list)
mi_free_block_delayed_mt(page,block);
}
*/
// ------------------------------------------------------
// Usable size