wip: initial version with eager abandonment

This commit is contained in:
daanx 2024-12-02 16:01:45 -08:00
commit d96c134566
14 changed files with 1127 additions and 419 deletions

View file

@ -379,6 +379,7 @@ typedef enum mi_option_e {
mi_option_guarded_sample_rate, // 1 out of N allocations in the min/max range will be guarded (=1000)
mi_option_guarded_sample_seed, // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0)
mi_option_target_segments_per_thread, // experimental (=0)
mi_option_eager_abandon, // eagerly abandon pages from the heap if suitable (to reduce memory footprint in multi-threaded code)
_mi_option_last,
// legacy option names
mi_option_large_os_pages = mi_option_allow_large_os_pages,

View file

@ -145,20 +145,13 @@ typedef int32_t mi_ssize_t;
size_t _mi_clz_generic(size_t x);
size_t _mi_ctz_generic(size_t x);
uint32_t _mi_ctz_generic32(uint32_t x);
static inline size_t mi_ctz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
uint64_t r;
__asm volatile ("tzcnt\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_ARM64
uint64_t r;
__asm volatile ("rbit\t%0, %1\n\tclz\t%0, %0" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_RISCV
size_t r;
__asm volatile ("ctz\t%0, %1" : "=&r"(r) : "r"(x) : );
return r;
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_tzcnt_u64(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
@ -168,6 +161,17 @@ static inline size_t mi_ctz(size_t x) {
#else
return (_BitScanForward64(&idx, x) ? (size_t)idx : 64);
#endif
/*
// for arm64 and riscv, the builtin_ctz is defined for 0 as well
#elif defined(__GNUC__) && MI_ARCH_ARM64
uint64_t r;
__asm volatile ("rbit\t%0, %1\n\tclz\t%0, %0" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_RISCV
size_t r;
__asm volatile ("ctz\t%0, %1" : "=&r"(r) : "r"(x) : );
return r;
*/
#elif mi_has_builtin_size(ctz)
return (x!=0 ? (size_t)mi_builtin_size(ctz)(x) : MI_SIZE_BITS);
#else
@ -177,18 +181,10 @@ static inline size_t mi_ctz(size_t x) {
}
static inline size_t mi_clz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
uint64_t r;
__asm volatile ("lzcnt\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_ARM64
uint64_t r;
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_RISCV
size_t r;
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : );
return r;
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_lzcnt_u64(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
@ -198,6 +194,17 @@ static inline size_t mi_clz(size_t x) {
#else
return (_BitScanReverse64(&idx, x) ? 63 - (size_t)idx : 64);
#endif
/*
// for arm64 and riscv, the builtin_clz is defined for 0 as well
#elif defined(__GNUC__) && MI_ARCH_ARM64
uint64_t r;
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_RISCV
size_t r;
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : );
return r;
*/
#elif mi_has_builtin_size(clz)
return (x!=0 ? (size_t)mi_builtin_size(clz)(x) : MI_SIZE_BITS);
#else
@ -206,6 +213,26 @@ static inline size_t mi_clz(size_t x) {
#endif
}
static inline uint32_t mi_ctz32(uint32_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
uint32_t r;
__asm volatile ("tzcntl\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
return r;
#elif MI_ARCH_X64 && defined(__BMI1__)
return (uint32_t)_tzcnt_u32(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
return (_BitScanForward(&idx, x) ? (uint32_t)idx : 32);
#elif mi_has_builtin(ctz) && (INT_MAX == INT32_MAX)
return (x!=0 ? (uint32_t)mi_builtin(ctz)(x) : 32);
#elif mi_has_builtin(ctzl) && (LONG_MAX == INT32_MAX)
return (x!=0 ? (uint32_t)mi_builtin(ctzl)(x) : 32);
#else
#define MI_HAS_FAST_BITSCAN 0
return _mi_ctz_generic32(x);
#endif
}
#ifndef MI_HAS_FAST_BITSCAN
#define MI_HAS_FAST_BITSCAN 1
#endif
@ -229,6 +256,22 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
#endif
}
// Bit scan forward: find the least significant bit that is set (i.e. count trailing zero's)
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsf32(uint32_t x, uint32_t* idx) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
// on x64 the carry flag is set on zero which gives better codegen
bool is_zero;
__asm ("tzcntl\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
return !is_zero;
#else
*idx = mi_ctz32(x);
return (x!=0);
#endif
}
// Bit scan reverse: find the most significant bit that is set
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
@ -248,29 +291,6 @@ static inline bool mi_bsr(size_t x, size_t* idx) {
}
/* --------------------------------------------------------------------------------
find least/most significant bit position
-------------------------------------------------------------------------------- */
// Find most significant bit index, or MI_SIZE_BITS if 0
static inline size_t mi_find_msb(size_t x) {
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
#if MI_SIZE_BITS==32
return (_BitScanReverse(&i, x) ? i : 32);
#else
return (_BitScanReverse64(&i, x) ? i : 64);
#endif
#else
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
#endif
}
// Find least significant bit index, or MI_SIZE_BITS if 0 (this equals `mi_ctz`, count trailing zero's)
static inline size_t mi_find_lsb(size_t x) {
return mi_ctz(x);
}
/* --------------------------------------------------------------------------------
rotate
@ -288,13 +308,26 @@ static inline size_t mi_rotr(size_t x, size_t r) {
return _rotr64(x,(int)r);
#endif
#else
// The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
return ((x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1))));
#endif
}
static inline uint32_t mi_rotr32(uint32_t x, uint32_t r) {
#if mi_has_builtin(rotateright32)
return mi_builtin(rotateright32)(x, r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
return _lrotr(x, (int)r);
#else
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & 31;
return ((x >> rshift) | (x << ((-rshift) & 31)));
#endif
}
static inline size_t mi_rotl(size_t x, size_t r) {
#if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64)
return mi_builtin(rotateleft64)(x,r);
@ -307,7 +340,7 @@ static inline size_t mi_rotl(size_t x, size_t r) {
return _rotl64(x,(int)r);
#endif
#else
// The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
return ((x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1))));

View file

@ -141,8 +141,10 @@ void _mi_arenas_collect(bool force_purge, mi_stats_t* stats);
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment);
void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld);
void _mi_arena_page_free(mi_page_t* page, mi_tld_t* tld);
void _mi_arena_page_free(mi_page_t* page);
void _mi_arena_page_abandon(mi_page_t* page);
void _mi_arena_page_unabandon(mi_page_t* page);
bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page);
void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap);
@ -174,19 +176,19 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t hu
void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
void _mi_page_unfull(mi_page_t* page);
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq); // free the page
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
void _mi_page_force_abandon(mi_page_t* page);
void _mi_heap_delayed_free_all(mi_heap_t* heap);
bool _mi_heap_delayed_free_partial(mi_heap_t* heap);
// void _mi_heap_delayed_free_all(mi_heap_t* heap);
// bool _mi_heap_delayed_free_partial(mi_heap_t* heap);
void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
void _mi_deferred_free(mi_heap_t* heap, bool force);
void _mi_page_free_collect(mi_page_t* page,bool force);
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
// void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
void _mi_page_init(mi_heap_t* heap, mi_page_t* page);
size_t _mi_bin_size(uint8_t bin); // for stats
@ -202,6 +204,7 @@ void _mi_heap_unsafe_destroy_all(void);
mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag);
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg);
void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page);
// "stats.c"
void _mi_stats_done(mi_stats_t* stats);
@ -511,6 +514,24 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
return mi_page_block_size(page) - MI_PADDING_SIZE;
}
static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
return page->heap;
}
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
if (heap != NULL) {
// mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
page->heap = heap;
page->heap_tag = heap->tag;
mi_atomic_store_release(&page->xthread_id, heap->thread_id);
}
else {
// mi_atomic_store_release(&page->xheap, (uintptr_t)heap->tld->subproc);
page->heap = NULL;
mi_atomic_store_release(&page->xthread_id,0);
}
}
//static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
// mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
// if (heap != NULL) {
@ -529,13 +550,18 @@ static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
return (mi_block_t*)(tf & ~1);
}
static inline bool mi_tf_is_owned(mi_thread_free_t tf) {
return ((tf & 1) == 0);
return ((tf & 1) == 1);
}
static inline mi_thread_free_t mi_tf_create(mi_block_t* block, bool owned) {
return (mi_thread_free_t)((uintptr_t)block | (owned ? 0 : 1));
return (mi_thread_free_t)((uintptr_t)block | (owned ? 1 : 0));
}
// Thread id of thread that owns this page
static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
return mi_atomic_load_relaxed(&page->xthread_id);
}
// Thread free access
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
return mi_tf_block(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
@ -546,9 +572,27 @@ static inline bool mi_page_is_owned(const mi_page_t* page) {
return mi_tf_is_owned(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
}
// Thread id of thread that owns this page
static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
return mi_atomic_load_relaxed(&page->xthread_id);
// Unown a page that is currently owned
static inline void _mi_page_unown(mi_page_t* page) {
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_thread_id(page)==0);
const uintptr_t old = mi_atomic_and_acq_rel(&page->xthread_free, ~((uintptr_t)1));
mi_assert_internal((old&1)==1); MI_UNUSED(old);
/*
mi_thread_free_t tf_new;
mi_thread_free_t tf_old;
do {
tf_old = mi_atomic_load_relaxed(&page->xthread_free);
mi_assert_internal(mi_tf_is_owned(tf_old));
tf_new = mi_tf_create(mi_tf_block(tf_old), false);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf_old, tf_new));
*/
}
// get ownership if it is not yet owned
static inline bool mi_page_try_claim_ownership(mi_page_t* page) {
const uintptr_t old = mi_atomic_or_acq_rel(&page->xthread_free, 1);
return ((old&1)==0);
}

View file

@ -120,7 +120,7 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#endif
#ifndef MI_BITMAP_CHUNK_BITS_SHIFT
#define MI_BITMAP_CHUNK_BITS_SHIFT 8 // optimized for 256 bits per chunk (avx2)
#define MI_BITMAP_CHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512)
#endif
#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
@ -305,8 +305,8 @@ typedef struct mi_page_s {
#endif
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
// _Atomic(uintptr_t) xheap; // heap this threads belong to.
mi_heap_t* heap; // heap this threads belong to.
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
mi_subproc_t* subproc; // sub-process of this heap
@ -401,7 +401,7 @@ typedef struct mi_padding_s {
// A heap owns a set of pages.
struct mi_heap_s {
mi_tld_t* tld;
_Atomic(mi_block_t*) thread_delayed_free;
// _Atomic(mi_block_t*) thread_delayed_free;
mi_threadid_t thread_id; // thread this heap belongs too
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
@ -412,6 +412,7 @@ struct mi_heap_s {
size_t page_retired_max; // largest retired index into the `pages` array.
mi_heap_t* next; // list of heaps per thread
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
bool eager_abandon; // `true` if this heap can abandon pages to reduce memory footprint
uint8_t tag; // custom tag, can be used for separating heaps based on the object types
#if MI_GUARDED
size_t guarded_size_min; // minimal size for guarded objects

View file

@ -42,13 +42,12 @@ typedef struct mi_arena_s {
bool is_large; // memory area consists of large- or huge OS pages (always committed)
mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited
_Atomic(mi_msecs_t) purge_expire; // expiration time when slices should be decommitted from `slices_decommit`.
mi_subproc_t* subproc;
mi_bitmap_t slices_free; // is the slice free?
mi_bitmap_t slices_committed; // is the slice committed? (i.e. accessible)
mi_bitmap_t slices_purge; // can the slice be purged? (slice in purge => slice in free)
mi_bitmap_t slices_dirty; // is the slice potentially non-zero?
mi_bitmap_t slices_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page)
mi_pairmap_t pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page)
// the full queue contains abandoned full pages
} mi_arena_t;
@ -197,7 +196,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
// set the dirty bits
if (arena->memid.initially_zero) {
memid->initially_zero = mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_dirty, slice_index, slice_count, NULL);
memid->initially_zero = mi_bitmap_setN(&arena->slices_dirty, slice_index, slice_count, NULL);
}
// set commit state
@ -206,7 +205,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
memid->initially_committed = true;
bool all_already_committed;
mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slice_count, &all_already_committed);
mi_bitmap_setN(&arena->slices_committed, slice_index, slice_count, &all_already_committed);
if (!all_already_committed) {
bool commit_zero = false;
if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero, NULL)) {
@ -219,13 +218,13 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
}
else {
// no need to commit, but check if already fully committed
memid->initially_committed = mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slice_count);
memid->initially_committed = mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count);
}
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->slices_free, slice_index, slice_count));
if (commit) { mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slice_count)); }
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_dirty, slice_index, slice_count));
// mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->slices_purge, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count));
if (commit) { mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)); }
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count));
// mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count));
return p;
}
@ -285,8 +284,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
Arena iteration
----------------------------------------------------------- */
static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, mi_subproc_t* subproc, int numa_node, bool allow_large) {
if (subproc != NULL && arena->subproc != subproc) return false;
static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, int numa_node, bool allow_large) {
if (!allow_large && arena->is_large) return false;
if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return false;
if (req_arena_id == _mi_arena_id_none()) { // if not specific, check numa affinity
@ -298,7 +296,7 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are
#define MI_THREADS_PER_ARENA (16)
#define mi_forall_arenas(req_arena_id, subproc, allow_large, tseq, var_arena_id, var_arena) \
#define mi_forall_arenas(req_arena_id, allow_large, tseq, var_arena_id, var_arena) \
{ \
size_t _max_arena; \
size_t _start; \
@ -316,7 +314,7 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are
if (_idx >= _max_arena) { _idx -= _max_arena; } \
const mi_arena_id_t var_arena_id = mi_arena_id_create(_idx); MI_UNUSED(var_arena_id);\
mi_arena_t* const var_arena = mi_arena_from_index(_idx); \
if (var_arena != NULL && mi_arena_is_suitable(var_arena,req_arena_id,subproc,-1 /* todo: numa node */,allow_large)) \
if (var_arena != NULL && mi_arena_is_suitable(var_arena,req_arena_id,-1 /* todo: numa node */,allow_large)) \
{
#define mi_forall_arenas_end() }}}
@ -337,9 +335,8 @@ static mi_decl_noinline void* mi_arena_try_find_free(
if (alignment > MI_ARENA_SLICE_ALIGN) return NULL;
// search arena's
mi_subproc_t* const subproc = tld->subproc;
const size_t tseq = tld->tseq;
mi_forall_arenas(req_arena_id, subproc, allow_large, tseq, arena_id, arena)
mi_forall_arenas(req_arena_id, allow_large, tseq, arena_id, arena)
{
void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
if (p != NULL) return p;
@ -448,26 +445,40 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl
// search arena's
const bool allow_large = true;
size_t tseq = tld->tseq;
mi_forall_arenas(req_arena_id, subproc, allow_large, tseq, arena_id, arena)
mi_forall_arenas(req_arena_id, allow_large, tseq, arena_id, arena)
{
size_t slice_index;
if (mi_bitmap_try_find_and_clear(&arena->slices_abandoned[bin], tseq, &slice_index)) {
mi_pairmap_t* const pairmap = &arena->pages_abandoned[bin];
while (mi_pairmap_try_find_and_set_busy(pairmap, tseq, &slice_index)) { // todo: don't restart from scratch if we fail for some entry?
// found an abandoned page of the right size
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
// it is set busy for now so we can read safely even with concurrent mi_free reclaiming
// try to claim ownership atomically
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->slices_free, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_dirty, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->slices_purge, slice_index, slice_count));
mi_assert_internal(mi_page_block_size(page) == block_size);
mi_assert_internal(!mi_page_is_full(page));
mi_assert_internal(mi_page_is_abandoned(page));
if (!mi_page_try_claim_ownership(page)) {
// a concurrent free already grabbed the page.
// Restore the abandoned_map to make it available again (unblocking busy waiters)
mi_pairmap_set(pairmap, slice_index);
}
else {
// we got ownership, clear the abandoned entry (unblocking busy waiters)
mi_pairmap_clear(pairmap, slice_index);
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
_mi_page_free_collect(page, false); // update `used` count
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count));
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
mi_assert_internal(mi_page_block_size(page) == block_size);
mi_assert_internal(mi_page_is_abandoned(page));
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(!mi_page_is_full(page));
return page;
}
}
}
mi_forall_arenas_end();
return NULL;
}
@ -602,40 +613,99 @@ mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t block
void _mi_arena_page_free(mi_page_t* page, mi_tld_t* tld) {
void _mi_arena_page_free(mi_page_t* page) {
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(page->next==NULL);
#if MI_DEBUG>1
if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) {
size_t bin = _mi_bin(mi_page_block_size(page));
size_t slice_index;
size_t slice_count;
mi_arena_t* arena = mi_page_arena(page, &slice_index, &slice_count);
mi_assert_internal(!mi_page_is_singleton(page));
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count));
mi_assert_internal(mi_pairmap_is_clear(&arena->pages_abandoned[bin], slice_index));
}
#endif
_mi_page_map_unregister(page);
_mi_arena_free(page, 1, 1, page->memid, &tld->stats);
_mi_arena_free(page, 1, 1, page->memid, NULL);
}
/* -----------------------------------------------------------
Arena abandon
----------------------------------------------------------- */
void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld) {
mi_assert_internal(mi_page_is_abandoned(page));
mi_assert_internal(page->next==NULL);
void _mi_arena_page_abandon(mi_page_t* page) {
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
mi_assert_internal(!mi_page_all_free(page));
mi_assert_internal(page->next==NULL);
if (mi_page_all_free(page)) {
_mi_arena_page_free(page, tld);
}
else if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) {
mi_subproc_t* subproc = page->subproc;
if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) {
// make available for allocations
size_t bin = _mi_bin(mi_page_block_size(page));
size_t slice_index;
mi_arena_t* arena = mi_page_arena(page, &slice_index, NULL);
bool were_zero = mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_abandoned[bin], slice_index, 1, NULL);
size_t slice_count;
mi_arena_t* arena = mi_page_arena(page, &slice_index, &slice_count);
mi_assert_internal(!mi_page_is_singleton(page));
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count));
// mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count));
_mi_page_unown(page);
bool were_zero = mi_pairmap_set(&arena->pages_abandoned[bin], slice_index);
MI_UNUSED(were_zero); mi_assert_internal(were_zero);
mi_atomic_increment_relaxed(&tld->subproc->abandoned_count[bin]);
mi_atomic_increment_relaxed(&subproc->abandoned_count[bin]);
}
else {
// page is full (or a singleton), page is OS/externally allocated
// leave as is; it will be reclaimed when an object is free'd in the page
_mi_page_unown(page);
}
}
// called from `mi_free` if trying to unabandon an abandoned page
void _mi_arena_page_unabandon(mi_page_t* page) {
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) {
// remove from the abandoned map
size_t bin = _mi_bin(mi_page_block_size(page));
size_t slice_index;
size_t slice_count;
mi_arena_t* arena = mi_page_arena(page, &slice_index, &slice_count);
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count));
// this busy waits until a concurrent reader (from alloc_abandoned) is done
mi_pairmap_clear_while_not_busy(&arena->pages_abandoned[bin], slice_index);
mi_atomic_decrement_relaxed(&page->subproc->abandoned_count[bin]);
}
else {
// page is full (or a singleton), page is OS/externally allocated
// nothing to do
// TODO: maintain count of these as well?
}
}
/*
bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_is_abandoned(page)); }
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
@ -660,7 +730,7 @@ bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
// return false;
// }
const size_t bin = _mi_bin(page->block_size);
if (mi_bitmap_try_xsetN(MI_BIT_CLEAR, &arena->slices_abandoned[bin], slice_index, 1)) {
if (mi_bitmap_try_clear(&arena->slices_abandoned[bin], slice_index)) {
// we got it atomically
_mi_page_reclaim(heap, page);
mi_assert_internal(!mi_page_is_abandoned(page));
@ -668,7 +738,7 @@ bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
}
else {
if (mi_page_is_abandoned(page)) {
mi_assert(false);
// mi_assert(false);
}
}
}
@ -689,6 +759,7 @@ bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
return false;
}
*/
void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) {
MI_UNUSED(heap);
@ -704,11 +775,12 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_
static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats);
void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) {
mi_assert_internal(size > 0 && stats != NULL);
mi_assert_internal(size > 0);
mi_assert_internal(committed_size <= size);
if (p==NULL) return;
if (size==0) return;
const bool all_committed = (committed_size == size);
if (stats==NULL) { stats = &_mi_stats_main; }
// need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
mi_track_mem_undefined(p, size);
@ -748,7 +820,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
else {
if (!all_committed) {
// mark the entire range as no longer committed (so we recommit the full range when re-using)
mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->slices_committed, slice_index, slice_count, NULL);
mi_bitmap_clearN(&arena->slices_committed, slice_index, slice_count);
mi_track_mem_noaccess(p, size);
if (committed_size > 0) {
// if partially committed, adjust the committed stats (is it will be recommitted when re-using)
@ -764,7 +836,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
}
// and make it available to others again
bool all_inuse = mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_free, slice_index, slice_count, NULL);
bool all_inuse = mi_bitmap_setN(&arena->slices_free, slice_index, slice_count, NULL);
if (!all_inuse) {
_mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", mi_arena_slice_start(arena,slice_index), mi_size_of_slices(slice_count));
return;
@ -891,10 +963,6 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
arena->is_large = is_large;
arena->purge_expire = 0;
mi_lock_init(&arena->abandoned_visit_lock);
mi_heap_t* heap = mi_heap_get_default();
if (heap != NULL) {
arena->subproc = heap->tld->subproc;
}
// init bitmaps
mi_bitmap_init(&arena->slices_free,true);
@ -902,18 +970,18 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
mi_bitmap_init(&arena->slices_dirty,true);
mi_bitmap_init(&arena->slices_purge,true);
for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) {
mi_bitmap_init(&arena->slices_abandoned[i],true);
mi_pairmap_init(&arena->pages_abandoned[i],true);
}
// reserve our meta info (and reserve slices outside the memory area)
mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->slices_free, info_slices /* start */, arena->slice_count - info_slices);
mi_bitmap_unsafe_setN(&arena->slices_free, info_slices /* start */, arena->slice_count - info_slices);
if (memid.initially_committed) {
mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->slices_committed, 0, arena->slice_count);
mi_bitmap_unsafe_setN(&arena->slices_committed, 0, arena->slice_count);
}
else {
mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_committed, 0, info_slices, NULL);
mi_bitmap_setN(&arena->slices_committed, 0, info_slices, NULL);
}
mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_dirty, 0, info_slices, NULL);
mi_bitmap_setN(&arena->slices_dirty, 0, info_slices, NULL);
return mi_arena_add(arena, arena_id, &_mi_stats_main);
}
@ -973,10 +1041,16 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
_mi_output_message("%s%s:\n", prefix, header);
size_t bit_count = 0;
size_t bit_set_count = 0;
for (int i = 0; i < MI_BFIELD_BITS && bit_count < slice_count; i++) {
char buf[MI_BITMAP_CHUNK_BITS + 32]; _mi_memzero(buf, sizeof(buf));
for (int i = 0; i < MI_BITMAP_CHUNK_COUNT && bit_count < slice_count; i++) {
char buf[MI_BITMAP_CHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf));
mi_bitmap_chunk_t* chunk = &bitmap->chunks[i];
for (size_t j = 0, k = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) {
if (j > 0 && (j % 4) == 0) {
buf[k++] = '\n';
_mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix);
buf[k++] = ' ';
buf[k++] = ' ';
}
if (bit_count < slice_count) {
mi_bfield_t bfield = chunk->bfields[j];
if (invert) bfield = ~bfield;
@ -987,12 +1061,11 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
buf[k++] = ' ';
}
else {
_mi_memset(buf + k, ' ', MI_BFIELD_BITS);
_mi_memset(buf + k, 'o', MI_BFIELD_BITS);
k += MI_BFIELD_BITS;
}
bit_count += MI_BFIELD_BITS;
}
_mi_output_message("%s %s\n", prefix, buf);
}
_mi_output_message("%s total ('x'): %zu\n", prefix, bit_set_count);
@ -1113,7 +1186,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices,
const size_t size = mi_size_of_slices(slices);
void* const p = mi_arena_slice_start(arena, slice_index);
bool needs_recommit;
if (mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slices)) {
if (mi_bitmap_is_setN(&arena->slices_committed, slice_index, slices)) {
// all slices are committed, we can purge freely
needs_recommit = _mi_os_purge(p, size, stats);
}
@ -1128,11 +1201,11 @@ static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices,
}
// clear the purged slices
mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->slices_purge, slices, slice_index, NULL);
mi_bitmap_clearN(&arena->slices_purge, slices, slice_index);
// update committed bitmap
if (needs_recommit) {
mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->slices_committed, slices, slice_index, NULL);
mi_bitmap_clearN(&arena->slices_committed, slices, slice_index);
}
}

File diff suppressed because it is too large Load diff

View file

@ -25,20 +25,28 @@ typedef size_t mi_bfield_t;
#define MI_BFIELD_LO_BIT8 (((~(mi_bfield_t)0))/0xFF) // 0x01010101 ..
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
#define MI_BITMAP_CHUNK_SIZE (MI_BITMAP_CHUNK_BITS / 8)
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
// 512 bits on 64_bit
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_chunk_s {
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
} mi_bitmap_chunk_t;
// for now 32 (note: with ABA instructions we can make this 64)
#define MI_EPOCHSET_BITS (32)
#define MI_BITMAP_CHUNK_COUNT MI_EPOCHSET_BITS
typedef uint64_t mi_epochset_t;
typedef mi_decl_align(32) struct mi_bitmap_s {
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
_Atomic(mi_bfield_t)any_set;
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s {
mi_bitmap_chunk_t chunks[MI_BITMAP_CHUNK_COUNT];
_Atomic(mi_epochset_t) any_set;
} mi_bitmap_t;
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
// 16k bits on 64bit, 8k bits on 32bit
// with 64KiB slices, this can address a 1GiB arena
#define MI_BITMAP_MAX_BITS (MI_BITMAP_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS)
/* --------------------------------------------------------------------------------
Atomic bitmap
@ -52,29 +60,73 @@ typedef bool mi_bit_t;
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n);
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* all_already_xset);
static inline bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, bool* all_already_set) {
return mi_bitmap_xsetN(MI_BIT_SET, bitmap, idx, n, all_already_set);
}
static inline bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_xsetN(MI_BIT_CLEAR, bitmap, idx, n, NULL);
}
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
static inline bool mi_bitmap_is_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_is_xsetN(MI_BIT_SET, bitmap, idx, n);
}
static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
}
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
// and false otherwise leaving the bitmask as is.
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
static inline bool mi_bitmap_try_set(mi_bitmap_t* bitmap, size_t idx) {
return mi_bitmap_try_xset(MI_BIT_SET, bitmap, idx);
}
static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) {
return mi_bitmap_try_xset(MI_BIT_CLEAR, bitmap, idx);
}
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise leaving the bitmask as is.
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
static inline bool mi_bitmap_try_set8(mi_bitmap_t* bitmap, size_t idx) {
return mi_bitmap_try_xset8(MI_BIT_SET, bitmap, idx);
}
static inline bool mi_bitmap_try_clear8(mi_bitmap_t* bitmap, size_t idx) {
return mi_bitmap_try_xset8(MI_BIT_CLEAR, bitmap, idx);
}
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
// and false otherwise leaving the bitmask as is.
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
static inline bool mi_bitmap_try_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_try_xsetN(MI_BIT_SET, bitmap, idx, n);
}
static inline bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_try_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
}
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
@ -98,21 +150,24 @@ typedef mi_bfield_t mi_pair_t;
#define MI_PAIR_CLEAR (0)
#define MI_PAIR_BUSY (1)
#define MI_PAIR_BUSYX (2)
#define MI_PAIR_UNUSED (2) // should never occur
#define MI_PAIR_SET (3)
typedef mi_decl_align(32) struct mi_pairmap_s {
mi_bitmap_chunk_t chunks[2*MI_BFIELD_BITS];
_Atomic(mi_bfield_t) any_set;
_Atomic(size_t) epoch;
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_pairmap_s {
mi_bitmap_chunk_t chunks[2*MI_BITMAP_CHUNK_COUNT];
_Atomic(mi_epochset_t) any_set;
} mi_pairmap_t;
#define MI_PAIRMAP_MAX_PAIRS (MI_BITMAP_MAX_BITS) // 16k pairs on 64bit, 8k pairs on 32bit
#define MI_PAIRMAP_MAX_BITS (2*MI_PAIRMAP_MAX_PAIRS)
mi_decl_nodiscard bool mi_pairmap_xset(mi_pair_t set, mi_pairmap_t* pairmap, size_t idx);
mi_decl_nodiscard bool mi_pairmap_xset_while_not_busy(mi_pair_t set, mi_pairmap_t* pairmap, size_t idx);
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t n, size_t tseq, size_t* pidx);
// initialize a pairmap to all unset; avoid a mem_zero if `already_zero` is true
void mi_pairmap_init(mi_pairmap_t* pairmap, bool already_zero);
bool mi_pairmap_set(mi_pairmap_t* pairmap, size_t pair_idx);
bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx);
void mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx);
void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx);
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx);
#endif // MI_XBITMAP_H

View file

@ -158,52 +158,64 @@ static void mi_decl_noinline mi_free_try_reclaim_mt(mi_page_t* page) {
mi_assert_internal(mi_page_thread_id(page)==0);
// we own the page now..
// first remove it from the abandoned pages in the arena
mi_heap_t* const heap = mi_heap_get_default();
_mi_arena_page_unabandon(page,heap->tld);
// collect the thread atomic free list
_mi_page_free_collect(page, false); // update `used` count
if (mi_page_is_singleton(page)) mi_assert_internal(mi_page_all_free(page));
if (mi_page_all_free(page)) {
// first remove it from the abandoned pages in the arena -- this waits for any readers to finish
_mi_arena_page_unabandon(page);
// we can free the page directly
_mi_arena_page_free(page, heap->tld);
_mi_arena_page_free(page);
return;
}
else {
// the page has still some blocks in use
// reclaim in our heap if compatible, or otherwise abandon again
if ((_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0) &&
(mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) && // we did not already terminate our thread (can this happen? yes, due to thread-local destructors for example (issue #944))
(page->subproc == heap->tld->subproc) && // don't reclaim across sub-processes
mi_arena_page_try_reclaim(page) // and we can reclaim it from the arena
// todo: optimize this check further?
// note: don't use `mi_heap_get_default()` as we may just have terminated this thread and we should
// not reinitialize the heap for this thread. (can happen due to thread-local destructors for example -- issue #944)
mi_heap_t* const heap = mi_prim_get_default_heap();
if ((_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0) && // only if reclaim on free is allowed
(heap != (mi_heap_t*)&_mi_heap_empty)) // we did not already terminate our thread (can this happen?
{
mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag);
if ((tagheap != NULL) && // don't reclaim across heap object types
(page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
(_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
)
{
// make it part of our heap
_mi_heap_page_reclaim(heap, page);
// first remove it from the abandoned pages in the arena -- this waits for any readers to finish
_mi_arena_page_unabandon(page);
// and make it part of our heap
_mi_heap_page_reclaim(tagheap, page);
return;
}
else {
// abandon again
_mi_arena_page_abandon(page, heap->tld);
}
// give up ownership as we cannot reclaim this page
// note: we don't need to re-abandon as we did not yet unabandon
_mi_page_unown(page);
}
}
// Push a block that is owned by another thread on its page-local thread free list.
static void mi_decl_noinline mi_free_block_delayed_mt(mi_page_t* page, mi_block_t* block)
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
// push atomically on the page thread free list
mi_thread_free_t tf_new;
mi_thread_free_t tf;
mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
do {
tf = mi_atomic_load_relaxed(&page->xthread_free);
mi_block_set_next(page, block, mi_tf_block(tf));
mi_block_set_next(page, block, mi_tf_block(tf_old));
tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf, tf_new));
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new));
// and atomically reclaim the page if it was abandoned
bool reclaimed = !mi_tf_is_owned(tf);
if (reclaimed) mi_free_try_reclaim_mt(page);
bool reclaimed = !mi_tf_is_owned(tf_old);
if (reclaimed) {
mi_free_try_reclaim_mt(page);
}
}
/*
@ -266,9 +278,9 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block
}
else {
if (mi_page_is_abandoned(page)) {
mi_assert(false);
// mi_assert(false);
}
mi_assert_internal(!mi_page_is_singleton(page)); // we should have succeeded on singleton pages
// mi_assert_internal(!mi_page_is_singleton(page)); // we should have succeeded on singleton pages
}
}
}

View file

@ -93,7 +93,7 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
if (mi_page_all_free(page)) {
// no more used blocks, free the page.
// note: this will free retired pages as well.
_mi_page_free(page, pq, collect >= MI_FORCE);
_mi_page_free(page, pq);
}
else if (collect == MI_ABANDON) {
// still used blocks but the thread is done; abandon the page
@ -102,14 +102,14 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
return true; // don't break
}
static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
MI_UNUSED(arg1);
MI_UNUSED(arg2);
MI_UNUSED(heap);
MI_UNUSED(pq);
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
return true; // don't break
}
//static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
// MI_UNUSED(arg1);
// MI_UNUSED(arg2);
// MI_UNUSED(heap);
// MI_UNUSED(pq);
// _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// return true; // don't break
//}
static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{
@ -137,20 +137,20 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
}
// if abandoning, mark all pages to no longer add to delayed_free
if (collect == MI_ABANDON) {
mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
}
//if (collect == MI_ABANDON) {
// mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
//}
// free all current thread delayed blocks.
// (if abandoning, after this there are no more thread-delayed references into the pages.)
_mi_heap_delayed_free_all(heap);
// _mi_heap_delayed_free_all(heap);
// collect retired pages
_mi_heap_collect_retired(heap, force);
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
// mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
// collect segments (purge pages, this can be expensive so don't force on abandonment)
// _mi_segments_collect(collect == MI_FORCE, &heap->tld->segments);
@ -206,6 +206,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool
heap->thread_id = _mi_thread_id();
heap->arena_id = arena_id;
heap->no_reclaim = noreclaim;
heap->eager_abandon = (!noreclaim && mi_option_is_enabled(mi_option_eager_abandon));
heap->tag = tag;
if (heap == tld->heap_backing) {
_mi_random_init(&heap->random);
@ -255,7 +256,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
// TODO: copy full empty heap instead?
_mi_memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
heap->thread_delayed_free = NULL;
// heap->thread_delayed_free = NULL;
heap->page_count = 0;
}
@ -314,7 +315,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
MI_UNUSED(pq);
// ensure no more thread_delayed_free will be added
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
//_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// stats
const size_t bsize = mi_page_block_size(page);
@ -341,7 +342,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
// mi_page_free(page,false);
page->next = NULL;
page->prev = NULL;
_mi_arena_page_free(page,heap->tld);
_mi_arena_page_free(page);
return true; // keep going
}
@ -413,7 +414,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
if (from==NULL || from->page_count == 0) return;
// reduce the size of the delayed frees
_mi_heap_delayed_free_partial(from);
// _mi_heap_delayed_free_partial(from);
// transfer all pages by appending the queues; this will set a new heap field
// so threads may do delayed frees in either heap for a while.
@ -432,10 +433,10 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
// note: be careful here as the `heap` field in all those pages no longer point to `from`,
// turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
// the regular `_mi_free_delayed_block` which is safe.
_mi_heap_delayed_free_all(from);
#if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
#endif
//_mi_heap_delayed_free_all(from);
//#if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
// mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
//#endif
// and reset the `from` heap
mi_heap_reset_pages(from);

View file

@ -31,8 +31,9 @@ const mi_page_t _mi_page_empty = {
{ 0, 0 },
#endif
MI_ATOMIC_VAR_INIT(0), // xthread_free
MI_ATOMIC_VAR_INIT(0), // xheap
NULL, // xheap
NULL, NULL, // next, prev
NULL, // subproc
{ {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid
};
@ -96,7 +97,7 @@ const mi_page_t _mi_page_empty = {
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL,
MI_ATOMIC_VAR_INIT(NULL), // thread delayed free
// MI_ATOMIC_VAR_INIT(NULL), // thread delayed free
0, // thread_id
0, // arena_id
0, // cookie
@ -106,6 +107,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
MI_BIN_FULL, 0, // page retired min/max
NULL, // next
false, // can reclaim
true, // can eager abandon
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
@ -138,7 +140,7 @@ static mi_decl_cache_align mi_tld_t tld_main = {
mi_decl_cache_align mi_heap_t _mi_heap_main = {
&tld_main,
MI_ATOMIC_VAR_INIT(NULL),
// MI_ATOMIC_VAR_INIT(NULL), // thread delayed free list
0, // thread id
0, // initial cookie
0, // arena id
@ -148,6 +150,7 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = {
MI_BIN_FULL, 0, // page retired min/max
NULL, // next heap
false, // can reclaim
true, // eager abandon
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 0,

View file

@ -280,7 +280,7 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
// generic trailing and leading zero count
// --------------------------------------------------------
static inline size_t mi_ctz_generic32(uint32_t x) {
uint32_t _mi_ctz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
@ -290,7 +290,7 @@ static inline size_t mi_ctz_generic32(uint32_t x) {
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
}
static inline size_t mi_clz_generic32(uint32_t x) {
static size_t mi_clz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
@ -319,10 +319,10 @@ size_t _mi_clz_generic(size_t x) {
size_t _mi_ctz_generic(size_t x) {
if (x==0) return MI_SIZE_BITS;
#if (MI_SIZE_BITS <= 32)
return mi_ctz_generic32((uint32_t)x);
return _mi_ctz_generic32((uint32_t)x);
#else
const size_t count = mi_ctz_generic32((uint32_t)x);
const size_t count = _mi_ctz_generic32((uint32_t)x);
if (count < 32) return count;
return (32 + mi_ctz_generic32((uint32_t)(x>>32)));
return (32 + _mi_ctz_generic32((uint32_t)(x>>32)));
#endif
}

View file

@ -158,6 +158,7 @@ static mi_option_desc_t options[_mi_option_last] =
UNINIT, MI_OPTION(guarded_sample_rate)}, // 1 out of N allocations in the min/max range will be guarded (=4000)
{ 0, UNINIT, MI_OPTION(guarded_sample_seed)},
{ 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable.
{ 1, UNINIT, MI_OPTION(eager_abandon) },
};
static void mi_option_init(mi_option_desc_t* desc);
@ -412,7 +413,7 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
// Define our own limited `fprintf` that avoids memory allocation.
// We do this using `_mi_vsnprintf` with a limited buffer.
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512];
char buf[768];
if (fmt==NULL) return;
if (!mi_recurse_enter()) return;
_mi_vsnprintf(buf, sizeof(buf)-1, fmt, args);

View file

@ -357,6 +357,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
// set append pages to new heap and count
size_t count = 0;
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
/*
// inline `mi_page_set_heap` to avoid wrong assertion during absorption;
// in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
@ -364,6 +365,8 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
// side effect that it spins until any DELAYED_FREEING is finished. This ensures
// that after appending only the new heap will be used for delayed free operations.
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
*/
mi_page_set_heap(page, heap);
count++;
}

View file

@ -119,19 +119,20 @@ bool _mi_page_is_valid(mi_page_t* page) {
#if MI_SECURE
mi_assert_internal(page->keys[0] != 0);
#endif
if (mi_page_heap(page)!=NULL) {
mi_assert_internal(!_mi_process_is_initialized || mi_page_thread_id(page) == mi_page_heap(page)->thread_id || mi_page_thread_id(page)==0);
if (!mi_page_is_abandoned(page)) {
mi_assert_internal(!_mi_process_is_initialized);
{
mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page));
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_is_huge(page) || mi_page_is_in_full(page));
mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
// mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
}
}
return true;
}
#endif
/*
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
while (!_mi_page_try_use_delayed_free(page, delay, override_never)) {
mi_atomic_yield();
@ -164,15 +165,13 @@ bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool ove
return true; // success
}
*/
/* -----------------------------------------------------------
Page collect the `local_free` and `thread_free` lists
----------------------------------------------------------- */
// Collect the local `thread_free` list using an atomic exchange.
// Note: The exchange must be done atomically as this is used right after
// moving to the full list in `mi_page_collect_ex` and we need to
// ensure that there was no race where the page became unfull just before the move.
static void _mi_page_thread_free_collect(mi_page_t* page)
{
mi_block_t* head;
@ -180,11 +179,10 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
head = mi_tf_block(tfree);
tfreex = mi_tf_set_block(tfree,NULL);
if (head == NULL) return; // return if the list is empty
tfreex = mi_tf_create(NULL,mi_tf_is_owned(tfree)); // set the thread free list to NULL
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));
// return if the list is empty
if (head == NULL) return;
mi_assert_internal(head != NULL);
// find the tail -- also to get a proper count (without data races)
size_t max_count = page->capacity; // cannot collect more than capacity
@ -213,9 +211,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
mi_assert_internal(page!=NULL);
// collect the thread free list
if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation
_mi_page_thread_free_collect(page);
}
// and the local free list
if (page->local_free != NULL) {
@ -248,22 +244,50 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
Page fresh and retire
----------------------------------------------------------- */
/*
// called from segments when reclaiming abandoned pages
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_page_set_heap(page, heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
// mi_page_set_heap(page, heap);
// _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
mi_assert_expensive(mi_page_is_valid_init(page));
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
// mi_assert_internal(mi_page_heap(page) == heap);
// mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
// TODO: push on full queue immediately if it is full?
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
}
*/
// called from `mi_free` on a reclaim, and fresh_alloc if we get an abandoned page
void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page)
{
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
mi_page_set_heap(page,heap);
_mi_page_free_collect(page, false); // ensure used count is up to date
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
}
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) {
_mi_page_free(page, pq);
}
else {
mi_page_queue_remove(pq, page);
mi_page_set_heap(page, NULL);
_mi_arena_page_abandon(page);
}
}
// allocate a fresh page from a segment
@ -279,7 +303,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
return NULL;
}
if (mi_page_is_abandoned(page)) {
_mi_page_reclaim(heap, page);
_mi_heap_page_reclaim(heap, page);
if (!mi_page_immediate_available(page)) {
if (mi_page_is_expandable(page)) {
mi_page_extend_free(heap, page);
@ -313,6 +337,7 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
Do any delayed frees
(put there by other threads if they deallocated in a full page)
----------------------------------------------------------- */
/*
void _mi_heap_delayed_free_all(mi_heap_t* heap) {
while (!_mi_heap_delayed_free_partial(heap)) {
mi_atomic_yield();
@ -323,7 +348,7 @@ void _mi_heap_delayed_free_all(mi_heap_t* heap) {
bool _mi_heap_delayed_free_partial(mi_heap_t* heap) {
// take over the list (note: no atomic exchange since it is often NULL)
mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };
while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { };
bool all_freed = true;
// and free them all
@ -344,16 +369,18 @@ bool _mi_heap_delayed_free_partial(mi_heap_t* heap) {
}
return all_freed;
}
*/
/* -----------------------------------------------------------
Unfull, abandon, free and retire
----------------------------------------------------------- */
// Move a page from the full list back to a regular list
// Move a page from the full list back to a regular list (called from thread-local mi_free)
void _mi_page_unfull(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(mi_page_is_in_full(page));
mi_assert_internal(!mi_page_heap(page)->eager_abandon);
if (!mi_page_is_in_full(page)) return;
mi_heap_t* heap = mi_page_heap(page);
@ -369,12 +396,20 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(!mi_page_immediate_available(page));
mi_assert_internal(!mi_page_is_in_full(page));
mi_heap_t* heap = mi_page_heap(page);
if (heap->eager_abandon) {
// abandon full pages
_mi_page_abandon(page, pq);
}
else {
// put full pages in a heap local queue
if (mi_page_is_in_full(page)) return;
mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
_mi_page_free_collect(page, false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
}
}
/*
// Abandon a page with used blocks at the end of a thread.
// Note: only call if it is ensured that no references exist from
// the `page->heap->thread_delayed_free` into this page.
@ -426,17 +461,15 @@ void _mi_page_force_abandon(mi_page_t* page) {
_mi_page_abandon(page, pq);
}
}
*/
// Free a page with no more free blocks
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
MI_UNUSED(force);
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
mi_heap_t* pheap = mi_page_heap(page);
// mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
// no more aligned blocks in here
mi_page_set_has_aligned(page, false);
@ -447,7 +480,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
// and free it
mi_page_set_heap(page,NULL);
_mi_arena_page_free(page, pheap->tld);
_mi_arena_page_free(page);
}
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
@ -490,7 +523,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
}
}
#endif
_mi_page_free(page, pq, false);
_mi_page_free(page, pq);
}
// free retired pages: we don't need to look at the entire queues
@ -505,7 +538,7 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
if (mi_page_all_free(page)) {
page->retire_expire--;
if (force || page->retire_expire == 0) {
_mi_page_free(pq->first, pq, force);
_mi_page_free(pq->first, pq);
}
else {
// keep retired, update min/max
@ -681,6 +714,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) {
void _mi_page_init(mi_heap_t* heap, mi_page_t* page) {
mi_assert(page != NULL);
mi_page_set_heap(page, heap);
page->subproc = heap->tld->subproc;
size_t page_size;
uint8_t* page_start = mi_page_area(page, &page_size); MI_UNUSED(page_start);
mi_track_mem_noaccess(page_start,page_size);
@ -741,7 +775,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
while (page != NULL)
{
mi_page_t* next = page->next; // remember next
mi_page_t* next = page->next; // remember next (as this page can move to another queue)
#if MI_STAT
count++;
#endif
@ -772,6 +806,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
candidate_count = 0;
}
else if (/* !mi_page_is_expandable(page) && */ page->used >= page_candidate->used) {
if (mi_page_all_free(page_candidate)) { _mi_page_free(page_candidate, pq); }
page_candidate = page;
}
// if we find a non-expandable candidate, or searched for N pages, return with the best candidate
@ -963,7 +998,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
_mi_deferred_free(heap, false);
// free delayed frees from other threads (but skip contended ones)
_mi_heap_delayed_free_partial(heap);
// _mi_heap_delayed_free_partial(heap);
// find (or allocate) a page of the right size
mi_page_t* page = mi_find_page(heap, size, huge_alignment);