mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-07-06 19:38:41 +03:00
wip: initial version with eager abandonment
This commit is contained in:
commit
d96c134566
14 changed files with 1127 additions and 419 deletions
|
@ -379,6 +379,7 @@ typedef enum mi_option_e {
|
|||
mi_option_guarded_sample_rate, // 1 out of N allocations in the min/max range will be guarded (=1000)
|
||||
mi_option_guarded_sample_seed, // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0)
|
||||
mi_option_target_segments_per_thread, // experimental (=0)
|
||||
mi_option_eager_abandon, // eagerly abandon pages from the heap if suitable (to reduce memory footprint in multi-threaded code)
|
||||
_mi_option_last,
|
||||
// legacy option names
|
||||
mi_option_large_os_pages = mi_option_allow_large_os_pages,
|
||||
|
|
|
@ -145,20 +145,13 @@ typedef int32_t mi_ssize_t;
|
|||
|
||||
size_t _mi_clz_generic(size_t x);
|
||||
size_t _mi_ctz_generic(size_t x);
|
||||
uint32_t _mi_ctz_generic32(uint32_t x);
|
||||
|
||||
static inline size_t mi_ctz(size_t x) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
|
||||
uint64_t r;
|
||||
__asm volatile ("tzcnt\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_ARM64
|
||||
uint64_t r;
|
||||
__asm volatile ("rbit\t%0, %1\n\tclz\t%0, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_RISCV
|
||||
size_t r;
|
||||
__asm volatile ("ctz\t%0, %1" : "=&r"(r) : "r"(x) : );
|
||||
return r;
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_tzcnt_u64(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
|
@ -168,6 +161,17 @@ static inline size_t mi_ctz(size_t x) {
|
|||
#else
|
||||
return (_BitScanForward64(&idx, x) ? (size_t)idx : 64);
|
||||
#endif
|
||||
/*
|
||||
// for arm64 and riscv, the builtin_ctz is defined for 0 as well
|
||||
#elif defined(__GNUC__) && MI_ARCH_ARM64
|
||||
uint64_t r;
|
||||
__asm volatile ("rbit\t%0, %1\n\tclz\t%0, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_RISCV
|
||||
size_t r;
|
||||
__asm volatile ("ctz\t%0, %1" : "=&r"(r) : "r"(x) : );
|
||||
return r;
|
||||
*/
|
||||
#elif mi_has_builtin_size(ctz)
|
||||
return (x!=0 ? (size_t)mi_builtin_size(ctz)(x) : MI_SIZE_BITS);
|
||||
#else
|
||||
|
@ -177,18 +181,10 @@ static inline size_t mi_ctz(size_t x) {
|
|||
}
|
||||
|
||||
static inline size_t mi_clz(size_t x) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
|
||||
uint64_t r;
|
||||
__asm volatile ("lzcnt\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_ARM64
|
||||
uint64_t r;
|
||||
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_RISCV
|
||||
size_t r;
|
||||
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : );
|
||||
return r;
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_lzcnt_u64(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
|
@ -198,6 +194,17 @@ static inline size_t mi_clz(size_t x) {
|
|||
#else
|
||||
return (_BitScanReverse64(&idx, x) ? 63 - (size_t)idx : 64);
|
||||
#endif
|
||||
/*
|
||||
// for arm64 and riscv, the builtin_clz is defined for 0 as well
|
||||
#elif defined(__GNUC__) && MI_ARCH_ARM64
|
||||
uint64_t r;
|
||||
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_RISCV
|
||||
size_t r;
|
||||
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : );
|
||||
return r;
|
||||
*/
|
||||
#elif mi_has_builtin_size(clz)
|
||||
return (x!=0 ? (size_t)mi_builtin_size(clz)(x) : MI_SIZE_BITS);
|
||||
#else
|
||||
|
@ -206,6 +213,26 @@ static inline size_t mi_clz(size_t x) {
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t mi_ctz32(uint32_t x) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
|
||||
uint32_t r;
|
||||
__asm volatile ("tzcntl\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (uint32_t)_tzcnt_u32(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
unsigned long idx;
|
||||
return (_BitScanForward(&idx, x) ? (uint32_t)idx : 32);
|
||||
#elif mi_has_builtin(ctz) && (INT_MAX == INT32_MAX)
|
||||
return (x!=0 ? (uint32_t)mi_builtin(ctz)(x) : 32);
|
||||
#elif mi_has_builtin(ctzl) && (LONG_MAX == INT32_MAX)
|
||||
return (x!=0 ? (uint32_t)mi_builtin(ctzl)(x) : 32);
|
||||
#else
|
||||
#define MI_HAS_FAST_BITSCAN 0
|
||||
return _mi_ctz_generic32(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef MI_HAS_FAST_BITSCAN
|
||||
#define MI_HAS_FAST_BITSCAN 1
|
||||
#endif
|
||||
|
@ -229,6 +256,22 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
|
|||
#endif
|
||||
}
|
||||
|
||||
// Bit scan forward: find the least significant bit that is set (i.e. count trailing zero's)
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bsf32(uint32_t x, uint32_t* idx) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
// on x64 the carry flag is set on zero which gives better codegen
|
||||
bool is_zero;
|
||||
__asm ("tzcntl\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
|
||||
return !is_zero;
|
||||
#else
|
||||
*idx = mi_ctz32(x);
|
||||
return (x!=0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Bit scan reverse: find the most significant bit that is set
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
|
@ -248,29 +291,6 @@ static inline bool mi_bsr(size_t x, size_t* idx) {
|
|||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
find least/most significant bit position
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// Find most significant bit index, or MI_SIZE_BITS if 0
|
||||
static inline size_t mi_find_msb(size_t x) {
|
||||
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
unsigned long i;
|
||||
#if MI_SIZE_BITS==32
|
||||
return (_BitScanReverse(&i, x) ? i : 32);
|
||||
#else
|
||||
return (_BitScanReverse64(&i, x) ? i : 64);
|
||||
#endif
|
||||
#else
|
||||
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Find least significant bit index, or MI_SIZE_BITS if 0 (this equals `mi_ctz`, count trailing zero's)
|
||||
static inline size_t mi_find_lsb(size_t x) {
|
||||
return mi_ctz(x);
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
rotate
|
||||
|
@ -288,13 +308,26 @@ static inline size_t mi_rotr(size_t x, size_t r) {
|
|||
return _rotr64(x,(int)r);
|
||||
#endif
|
||||
#else
|
||||
// The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to
|
||||
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
|
||||
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
|
||||
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
|
||||
return ((x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1))));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t mi_rotr32(uint32_t x, uint32_t r) {
|
||||
#if mi_has_builtin(rotateright32)
|
||||
return mi_builtin(rotateright32)(x, r);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
return _lrotr(x, (int)r);
|
||||
#else
|
||||
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
|
||||
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
|
||||
const unsigned int rshift = (unsigned int)(r) & 31;
|
||||
return ((x >> rshift) | (x << ((-rshift) & 31)));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t mi_rotl(size_t x, size_t r) {
|
||||
#if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64)
|
||||
return mi_builtin(rotateleft64)(x,r);
|
||||
|
@ -307,7 +340,7 @@ static inline size_t mi_rotl(size_t x, size_t r) {
|
|||
return _rotl64(x,(int)r);
|
||||
#endif
|
||||
#else
|
||||
// The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to
|
||||
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
|
||||
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
|
||||
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
|
||||
return ((x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1))));
|
||||
|
|
|
@ -141,8 +141,10 @@ void _mi_arenas_collect(bool force_purge, mi_stats_t* stats);
|
|||
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
|
||||
|
||||
mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment);
|
||||
void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld);
|
||||
void _mi_arena_page_free(mi_page_t* page, mi_tld_t* tld);
|
||||
void _mi_arena_page_free(mi_page_t* page);
|
||||
void _mi_arena_page_abandon(mi_page_t* page);
|
||||
void _mi_arena_page_unabandon(mi_page_t* page);
|
||||
|
||||
bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page);
|
||||
void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap);
|
||||
|
||||
|
@ -174,19 +176,19 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t hu
|
|||
|
||||
void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
|
||||
void _mi_page_unfull(mi_page_t* page);
|
||||
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
|
||||
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq); // free the page
|
||||
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
|
||||
void _mi_page_force_abandon(mi_page_t* page);
|
||||
|
||||
void _mi_heap_delayed_free_all(mi_heap_t* heap);
|
||||
bool _mi_heap_delayed_free_partial(mi_heap_t* heap);
|
||||
// void _mi_heap_delayed_free_all(mi_heap_t* heap);
|
||||
// bool _mi_heap_delayed_free_partial(mi_heap_t* heap);
|
||||
void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
|
||||
|
||||
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
|
||||
void _mi_deferred_free(mi_heap_t* heap, bool force);
|
||||
|
||||
void _mi_page_free_collect(mi_page_t* page,bool force);
|
||||
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
|
||||
// void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
|
||||
void _mi_page_init(mi_heap_t* heap, mi_page_t* page);
|
||||
|
||||
size_t _mi_bin_size(uint8_t bin); // for stats
|
||||
|
@ -202,6 +204,7 @@ void _mi_heap_unsafe_destroy_all(void);
|
|||
mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag);
|
||||
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
|
||||
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg);
|
||||
void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page);
|
||||
|
||||
// "stats.c"
|
||||
void _mi_stats_done(mi_stats_t* stats);
|
||||
|
@ -511,6 +514,24 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
|
|||
return mi_page_block_size(page) - MI_PADDING_SIZE;
|
||||
}
|
||||
|
||||
static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
|
||||
return page->heap;
|
||||
}
|
||||
|
||||
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
|
||||
if (heap != NULL) {
|
||||
// mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
|
||||
page->heap = heap;
|
||||
page->heap_tag = heap->tag;
|
||||
mi_atomic_store_release(&page->xthread_id, heap->thread_id);
|
||||
}
|
||||
else {
|
||||
// mi_atomic_store_release(&page->xheap, (uintptr_t)heap->tld->subproc);
|
||||
page->heap = NULL;
|
||||
mi_atomic_store_release(&page->xthread_id,0);
|
||||
}
|
||||
}
|
||||
|
||||
//static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
|
||||
// mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
|
||||
// if (heap != NULL) {
|
||||
|
@ -529,13 +550,18 @@ static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
|
|||
return (mi_block_t*)(tf & ~1);
|
||||
}
|
||||
static inline bool mi_tf_is_owned(mi_thread_free_t tf) {
|
||||
return ((tf & 1) == 0);
|
||||
return ((tf & 1) == 1);
|
||||
}
|
||||
static inline mi_thread_free_t mi_tf_create(mi_block_t* block, bool owned) {
|
||||
return (mi_thread_free_t)((uintptr_t)block | (owned ? 0 : 1));
|
||||
return (mi_thread_free_t)((uintptr_t)block | (owned ? 1 : 0));
|
||||
}
|
||||
|
||||
|
||||
// Thread id of thread that owns this page
|
||||
static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
|
||||
return mi_atomic_load_relaxed(&page->xthread_id);
|
||||
}
|
||||
|
||||
// Thread free access
|
||||
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
|
||||
return mi_tf_block(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
|
||||
|
@ -546,9 +572,27 @@ static inline bool mi_page_is_owned(const mi_page_t* page) {
|
|||
return mi_tf_is_owned(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
|
||||
}
|
||||
|
||||
// Thread id of thread that owns this page
|
||||
static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
|
||||
return mi_atomic_load_relaxed(&page->xthread_id);
|
||||
// Unown a page that is currently owned
|
||||
static inline void _mi_page_unown(mi_page_t* page) {
|
||||
mi_assert_internal(mi_page_is_owned(page));
|
||||
mi_assert_internal(mi_page_thread_id(page)==0);
|
||||
const uintptr_t old = mi_atomic_and_acq_rel(&page->xthread_free, ~((uintptr_t)1));
|
||||
mi_assert_internal((old&1)==1); MI_UNUSED(old);
|
||||
/*
|
||||
mi_thread_free_t tf_new;
|
||||
mi_thread_free_t tf_old;
|
||||
do {
|
||||
tf_old = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
mi_assert_internal(mi_tf_is_owned(tf_old));
|
||||
tf_new = mi_tf_create(mi_tf_block(tf_old), false);
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf_old, tf_new));
|
||||
*/
|
||||
}
|
||||
|
||||
// get ownership if it is not yet owned
|
||||
static inline bool mi_page_try_claim_ownership(mi_page_t* page) {
|
||||
const uintptr_t old = mi_atomic_or_acq_rel(&page->xthread_free, 1);
|
||||
return ((old&1)==0);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -120,7 +120,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#endif
|
||||
#endif
|
||||
#ifndef MI_BITMAP_CHUNK_BITS_SHIFT
|
||||
#define MI_BITMAP_CHUNK_BITS_SHIFT 8 // optimized for 256 bits per chunk (avx2)
|
||||
#define MI_BITMAP_CHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512)
|
||||
#endif
|
||||
|
||||
#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
|
||||
|
@ -305,8 +305,8 @@ typedef struct mi_page_s {
|
|||
#endif
|
||||
|
||||
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
|
||||
// _Atomic(uintptr_t) xheap; // heap this threads belong to.
|
||||
|
||||
|
||||
mi_heap_t* heap; // heap this threads belong to.
|
||||
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
|
||||
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
|
||||
mi_subproc_t* subproc; // sub-process of this heap
|
||||
|
@ -401,7 +401,7 @@ typedef struct mi_padding_s {
|
|||
// A heap owns a set of pages.
|
||||
struct mi_heap_s {
|
||||
mi_tld_t* tld;
|
||||
_Atomic(mi_block_t*) thread_delayed_free;
|
||||
// _Atomic(mi_block_t*) thread_delayed_free;
|
||||
mi_threadid_t thread_id; // thread this heap belongs too
|
||||
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
|
||||
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
|
||||
|
@ -412,6 +412,7 @@ struct mi_heap_s {
|
|||
size_t page_retired_max; // largest retired index into the `pages` array.
|
||||
mi_heap_t* next; // list of heaps per thread
|
||||
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
|
||||
bool eager_abandon; // `true` if this heap can abandon pages to reduce memory footprint
|
||||
uint8_t tag; // custom tag, can be used for separating heaps based on the object types
|
||||
#if MI_GUARDED
|
||||
size_t guarded_size_min; // minimal size for guarded objects
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue