Merge branch 'dev3-bin' of https://github.com/microsoft/mimalloc into dev3-bin

This commit is contained in:
Daan Leijen 2025-01-31 11:06:04 -08:00
commit b7665feb2f
8 changed files with 168 additions and 140 deletions

View file

@ -25,6 +25,23 @@ There are four requirements to make the overriding work well:
list of the final executable (so it can intercept all potential allocations).
You can use `minject -l <exe>` to check this if needed.
```csharp
┌──────────────┐
│ Your Program │
└────┬─────────┘
│ mi_version() ┌───────────────┐ ┌───────────────────────┐
├──────────────►│ mimalloc.dll ├────►│ mimalloc-redirect.dll │
│ └──────┬────────┘ └───────────────────────┘
│ ▼
│ malloc() etc. ┌──────────────┐
├──────────────►│ ucrtbase.dll │
│ └──────────────┘
└──────────────► ...
```
For best performance on Windows with C++, it
is also recommended to also override the `new`/`delete` operations (by including
[`mimalloc-new-delete.h`](../include/mimalloc-new-delete.h)

View file

@ -205,9 +205,8 @@ static inline size_t mi_ctz(size_t x) {
#elif mi_has_builtinz(ctz)
return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS);
#elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
if (x==0) return MI_SIZE_BITS;
size_t r;
__asm ("bsf\t%1, %0" : "=r"(r) : "r"(x) : "cc");
size_t r = MI_SIZE_BITS; // bsf leaves destination unmodified if the argument is 0 (see <https://github.com/llvm/llvm-project/pull/102885>)
__asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc");
return r;
#elif MI_HAS_FAST_POPCOUNT
return (x!=0 ? (mi_popcount(x^(x-1))-1) : MI_SIZE_BITS);

View file

@ -597,58 +597,6 @@ static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
return page->heap;
}
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
if (heap != NULL) {
page->heap = heap;
page->heap_tag = heap->tag;
mi_atomic_store_release(&page->xthread_id, heap->tld->thread_id);
}
else {
page->heap = NULL;
mi_atomic_store_release(&page->xthread_id,0);
}
}
// Thread free flag helpers
static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
return (mi_block_t*)(tf & ~1);
}
static inline bool mi_tf_is_owned(mi_thread_free_t tf) {
return ((tf & 1) == 1);
}
static inline mi_thread_free_t mi_tf_create(mi_block_t* block, bool owned) {
return (mi_thread_free_t)((uintptr_t)block | (owned ? 1 : 0));
}
// Thread id of thread that owns this page (with flags in the bottom 2 bits)
static inline mi_threadid_t mi_page_xthread_id(const mi_page_t* page) {
return mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_id);
}
// Plain thread id of the thread that owns this page
static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
return (mi_page_xthread_id(page) & ~MI_PAGE_FLAG_MASK);
}
// Thread free access
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
return mi_tf_block(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
}
// Owned?
static inline bool mi_page_is_owned(const mi_page_t* page) {
return mi_tf_is_owned(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
}
//static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
// return mi_tf_make(mi_tf_block(tf),delayed);
//}
//static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
// return mi_tf_make(block, mi_tf_delayed(tf));
//}
// are all blocks in a page freed?
// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
@ -657,12 +605,6 @@ static inline bool mi_page_all_free(const mi_page_t* page) {
return (page->used == 0);
}
// are there any available blocks?
static inline bool mi_page_has_any_available(const mi_page_t* page) {
mi_assert_internal(page != NULL && page->reserved > 0);
return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
}
// are there immediately available blocks, i.e. blocks available on the free list.
static inline bool mi_page_immediate_available(const mi_page_t* page) {
mi_assert_internal(page != NULL);
@ -698,25 +640,6 @@ static inline bool mi_page_is_used_at_frac(const mi_page_t* page, uint16_t n) {
return (page->reserved - page->used <= frac);
}
static inline bool mi_page_is_abandoned(const mi_page_t* page) {
// note: the xheap field of an abandoned heap is set to the subproc (for fast reclaim-on-free)
return (mi_page_xthread_id(page) <= MI_PAGE_IS_ABANDONED_MAPPED);
}
static inline bool mi_page_is_abandoned_mapped(const mi_page_t* page) {
return (mi_page_xthread_id(page) == MI_PAGE_IS_ABANDONED_MAPPED);
}
static inline void mi_page_set_abandoned_mapped(mi_page_t* page) {
mi_assert_internal(mi_page_is_abandoned(page));
mi_atomic_or_relaxed(&page->xthread_id, MI_PAGE_IS_ABANDONED_MAPPED);
}
static inline void mi_page_clear_abandoned_mapped(mi_page_t* page) {
mi_assert_internal(mi_page_is_abandoned_mapped(page));
mi_atomic_and_relaxed(&page->xthread_id, ~MI_PAGE_IS_ABANDONED_MAPPED);
}
static inline bool mi_page_is_huge(const mi_page_t* page) {
return (page->block_size > MI_LARGE_MAX_OBJ_SIZE ||
@ -730,6 +653,109 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size)
}
//-----------------------------------------------------------
// Page thread id and flags
//-----------------------------------------------------------
// Thread id of thread that owns this page (with flags in the bottom 2 bits)
static inline mi_threadid_t mi_page_xthread_id(const mi_page_t* page) {
return mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_id);
}
// Plain thread id of the thread that owns this page
static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
return (mi_page_xthread_id(page) & ~MI_PAGE_FLAG_MASK);
}
static inline mi_page_flags_t mi_page_flags(const mi_page_t* page) {
return (mi_page_xthread_id(page) & MI_PAGE_FLAG_MASK);
}
static inline void mi_page_flags_set(mi_page_t* page, bool set, mi_page_flags_t newflag) {
if (set) { mi_atomic_or_relaxed(&page->xthread_id, newflag); }
else { mi_atomic_and_relaxed(&page->xthread_id, ~newflag); }
}
static inline bool mi_page_is_in_full(const mi_page_t* page) {
return ((mi_page_flags(page) & MI_PAGE_IN_FULL_QUEUE) != 0);
}
static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
mi_page_flags_set(page, in_full, MI_PAGE_IN_FULL_QUEUE);
}
static inline bool mi_page_has_aligned(const mi_page_t* page) {
return ((mi_page_flags(page) & MI_PAGE_HAS_ALIGNED) != 0);
}
static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
mi_page_flags_set(page, has_aligned, MI_PAGE_HAS_ALIGNED);
}
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
// mi_assert_internal(!mi_page_is_in_full(page)); // can happen when destroying pages on heap_destroy
const mi_threadid_t tid = (heap == NULL ? MI_THREADID_ABANDONED : heap->tld->thread_id) | mi_page_flags(page);
if (heap != NULL) {
page->heap = heap;
page->heap_tag = heap->tag;
}
else {
page->heap = NULL;
}
mi_atomic_store_release(&page->xthread_id, tid);
}
static inline bool mi_page_is_abandoned(const mi_page_t* page) {
// note: the xheap field of an abandoned heap is set to the subproc (for fast reclaim-on-free)
return (mi_page_thread_id(page) <= MI_THREADID_ABANDONED_MAPPED);
}
static inline bool mi_page_is_abandoned_mapped(const mi_page_t* page) {
return (mi_page_thread_id(page) == MI_THREADID_ABANDONED_MAPPED);
}
static inline void mi_page_set_abandoned_mapped(mi_page_t* page) {
mi_assert_internal(mi_page_is_abandoned(page));
mi_atomic_or_relaxed(&page->xthread_id, MI_THREADID_ABANDONED_MAPPED);
}
static inline void mi_page_clear_abandoned_mapped(mi_page_t* page) {
mi_assert_internal(mi_page_is_abandoned_mapped(page));
mi_atomic_and_relaxed(&page->xthread_id, MI_PAGE_FLAG_MASK);
}
//-----------------------------------------------------------
// Thread free list and ownership
//-----------------------------------------------------------
// Thread free flag helpers
static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
return (mi_block_t*)(tf & ~1);
}
static inline bool mi_tf_is_owned(mi_thread_free_t tf) {
return ((tf & 1) == 1);
}
static inline mi_thread_free_t mi_tf_create(mi_block_t* block, bool owned) {
return (mi_thread_free_t)((uintptr_t)block | (owned ? 1 : 0));
}
// Thread free access
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
return mi_tf_block(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
}
// are there any available blocks?
static inline bool mi_page_has_any_available(const mi_page_t* page) {
mi_assert_internal(page != NULL && page->reserved > 0);
return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
}
// Owned?
static inline bool mi_page_is_owned(const mi_page_t* page) {
return mi_tf_is_owned(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
}
// Unown a page that is currently owned
static inline void _mi_page_unown_unconditional(mi_page_t* page) {
mi_assert_internal(mi_page_is_owned(page));
@ -738,7 +764,6 @@ static inline void _mi_page_unown_unconditional(mi_page_t* page) {
mi_assert_internal((old&1)==1); MI_UNUSED(old);
}
// get ownership if it is not yet owned
static inline bool mi_page_try_claim_ownership(mi_page_t* page) {
const uintptr_t old = mi_atomic_or_acq_rel(&page->xthread_free, 1);
@ -769,37 +794,6 @@ static inline bool _mi_page_unown(mi_page_t* page) {
return false;
}
//-----------------------------------------------------------
// Page flags
//-----------------------------------------------------------
static inline mi_page_flags_t mi_page_flags(const mi_page_t* page) {
return (mi_page_xthread_id(page) & MI_PAGE_FLAG_MASK);
}
static inline void mi_page_flags_set(mi_page_t* page, bool set, mi_page_flags_t newflag) {
if (set) {
mi_atomic_or_relaxed(&page->xthread_id, newflag);
}
else {
mi_atomic_and_relaxed(&page->xthread_id, ~newflag);
}
}
static inline bool mi_page_is_in_full(const mi_page_t* page) {
return ((mi_page_flags(page) & MI_PAGE_IN_FULL_QUEUE) != 0);
}
static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
mi_page_flags_set(page, in_full, MI_PAGE_IN_FULL_QUEUE);
}
static inline bool mi_page_has_aligned(const mi_page_t* page) {
return ((mi_page_flags(page) & MI_PAGE_HAS_ALIGNED) != 0);
}
static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
mi_page_flags_set(page, has_aligned, MI_PAGE_HAS_ALIGNED);
}
/* -------------------------------------------------------------------
Guarded objects

View file

@ -242,16 +242,18 @@ typedef struct mi_block_s {
} mi_block_t;
// The page flags are put in the bottom 3 bits of the thread_id (for a fast test in `mi_free`)
// The page flags are put in the bottom 2 bits of the thread_id (for a fast test in `mi_free`)
// `has_aligned` is true if the page has pointers at an offset in a block (so we unalign before free-ing)
// `in_full_queue` is true if the page is full and resides in the full queue (so we move it to a regular queue on free-ing)
// `is_abandoned_mapped` is true if the page is abandoned (thread_id==0) and it is in an arena so can be quickly found for reuse ("mapped")
#define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01)
#define MI_PAGE_HAS_ALIGNED MI_ZU(0x02)
#define MI_PAGE_IS_ABANDONED_MAPPED MI_ZU(0x04)
#define MI_PAGE_FLAG_MASK MI_ZU(0x07)
#define MI_PAGE_FLAG_MASK MI_ZU(0x03)
typedef size_t mi_page_flags_t;
// There are two special threadid's: 0 for abandoned threads, and 4 for abandoned & mapped threads --
// abandoned-mapped pages are abandoned but also mapped in an arena so can be quickly found for reuse.
#define MI_THREADID_ABANDONED MI_ZU(0)
#define MI_THREADID_ABANDONED_MAPPED (MI_PAGE_FLAG_MASK + 1)
// Thread free list.
// Points to a list of blocks that are freed by other threads.
@ -292,7 +294,7 @@ typedef uint8_t mi_heaptag_t;
// - Using `uint16_t` does not seem to slow things down
typedef struct mi_page_s {
_Atomic(mi_threadid_t) xthread_id; // thread this page belongs to. (= `heap->thread_id (or 0 if abandoned) | page_flags`)
_Atomic(mi_threadid_t) xthread_id; // thread this page belongs to. (= `heap->thread_id (or 0 or 4 if abandoned) | page_flags`)
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)

View file

@ -1833,9 +1833,15 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t*
// find accessed size
size_t asize;
// scan the commit map for the highest entry
// scan the commit map for the highest entry
size_t idx;
if (mi_bitmap_bsr(arena->slices_committed, &idx)) {
asize = (idx + 1)* MI_ARENA_SLICE_SIZE;
//if (mi_bitmap_bsr(arena->slices_committed, &idx)) {
// asize = (idx + 1)* MI_ARENA_SLICE_SIZE;
//}
if (mi_bitmap_bsr(arena->pages, &idx)) {
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, idx);
const size_t page_slice_count = page->memid.mem.arena.slice_count;
asize = mi_size_of_slices(idx + page_slice_count);
}
else {
asize = mi_arena_info_slices(arena) * MI_ARENA_SLICE_SIZE;

View file

@ -153,11 +153,11 @@ static inline bool mi_bfield_atomic_setX(_Atomic(mi_bfield_t)*b, size_t* already
return (old==0);
}
static inline bool mi_bfield_atomic_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) {
const mi_bfield_t old = mi_atomic_exchange_release(b, mi_bfield_zero());
if (all_clear!=NULL) { *all_clear = true; }
return (~old==0);
}
// static inline bool mi_bfield_atomic_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) {
// const mi_bfield_t old = mi_atomic_exchange_release(b, mi_bfield_zero());
// if (all_clear!=NULL) { *all_clear = true; }
// return (~old==0);
// }
// ------- mi_bfield_atomic_try_clear ---------------------------------------
@ -434,12 +434,12 @@ static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t
}
// Clear a full aligned bfield.
static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal((cidx%MI_BFIELD_BITS) == 0);
const size_t i = cidx / MI_BFIELD_BITS;
return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear);
}
// static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) {
// mi_assert_internal(cidx < MI_BCHUNK_BITS);
// mi_assert_internal((cidx%MI_BFIELD_BITS) == 0);
// const size_t i = cidx / MI_BFIELD_BITS;
// return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear);
// }
// Try to atomically clear a sequence of `n` bits within a chunk.
// Returns true if all bits transitioned from 1 to 0,
@ -717,6 +717,7 @@ static inline bool mi_bchunk_try_find_and_clear_8(mi_bchunk_t* chunk, size_t n,
// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
// Used to find large size pages in the free blocks.
// todo: try neon version
/*
static mi_decl_noinline bool mi_bchunk_try_find_and_clearX(mi_bchunk_t* chunk, size_t* pidx) {
#if MI_OPT_SIMD && defined(__AVX2__) && (MI_BCHUNK_BITS==512)
while (true) {
@ -759,6 +760,7 @@ static inline bool mi_bchunk_try_find_and_clear_X(mi_bchunk_t* chunk, size_t n,
mi_assert_internal(n==MI_BFIELD_BITS); MI_UNUSED(n);
return mi_bchunk_try_find_and_clearX(chunk, pidx);
}
*/
// find a sequence of `n` bits in a chunk with `0 < n <= MI_BFIELD_BITS` with all bits set,
// and try to clear them atomically.

View file

@ -298,17 +298,17 @@ void _mi_page_map_unregister(mi_page_t* page) {
mi_assert_internal(_mi_page_map != NULL);
mi_assert_internal(page != NULL);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_page_map != NULL);
if mi_unlikely(_mi_page_map == NULL) return;
// get index and count
size_t slice_count;
size_t sub_idx;
const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
// unset the offsets
// mi_page_map_set_range(NULL, idx, sub_idx, slice_count);
mi_page_map_set_range(NULL, idx, sub_idx, slice_count);
}
void _mi_page_map_unregister_range(void* start, size_t size) {
if mi_unlikely(_mi_page_map == NULL) return;
const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
size_t sub_idx;
const uintptr_t idx = _mi_page_map_index(start, &sub_idx);

View file

@ -70,6 +70,9 @@ static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL;
static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL;
static PGetNumaProcessorNode pGetNumaProcessorNode = NULL;
// Available after Windows XP
typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes );
//---------------------------------------------
// Enable large page support dynamically (if possible)
//---------------------------------------------
@ -140,13 +143,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
config->virtual_address_bits = vbits;
}
// get physical memory
ULONGLONG memInKiB = 0;
if (GetPhysicallyInstalledSystemMemory(&memInKiB)) {
if (memInKiB > 0 && memInKiB < (SIZE_MAX / MI_KiB)) {
config->physical_memory = (size_t)memInKiB * MI_KiB;
}
}
// get the VirtualAlloc2 function
HINSTANCE hDll;
hDll = LoadLibrary(TEXT("kernelbase.dll"));
@ -169,8 +166,19 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
// Get physical memory (not available on XP, so check dynamically)
PGetPhysicallyInstalledSystemMemory pGetPhysicallyInstalledSystemMemory = (PGetPhysicallyInstalledSystemMemory)(void (*)(void))GetProcAddress(hDll,"GetPhysicallyInstalledSystemMemory");
if (pGetPhysicallyInstalledSystemMemory != NULL) {
ULONGLONG memInKiB = 0;
if ((*pGetPhysicallyInstalledSystemMemory)(&memInKiB)) {
if (memInKiB > 0 && memInKiB < (SIZE_MAX / MI_KiB)) {
config->physical_memory = (size_t)memInKiB * MI_KiB;
}
}
}
FreeLibrary(hDll);
}
// Enable large/huge OS page support?
if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
win_enable_large_os_pages(&config->large_page_size);
}
@ -834,7 +842,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
#if (MI_ARCH_X64 || MI_ARCH_X86)
#if (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64)
if (win_major_version >= 6) {
// check if this thread belongs to a windows threadpool
// see: <https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/pebteb/teb/index.htm>