From fe5a3141142d27f1a0a54f95e8cb397b21ae19f3 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 2 Dec 2024 19:31:36 -0800 Subject: [PATCH] add base and size to OS memid --- include/mimalloc/bits.h | 24 ++++++++++++ include/mimalloc/internal.h | 4 +- include/mimalloc/types.h | 1 + src/arena.c | 14 +++---- src/bitmap.c | 73 +++++++++++++++++++++++++++---------- src/bitmap.h | 6 +-- src/options.c | 2 +- src/os.c | 18 +++++---- test/test-stress.c | 2 +- 9 files changed, 104 insertions(+), 40 deletions(-) diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 90d56b4f..f3bbe3bc 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -237,6 +237,30 @@ static inline uint32_t mi_ctz32(uint32_t x) { #define MI_HAS_FAST_BITSCAN 1 #endif + + +static inline size_t mi_popcount(size_t x) { +#if mi_has_builtin_size(popcount) + return mi_builtin_size(popcount)(x); +#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + #if MI_SIZE_BITS==32 + return __popcnt(x); + #else + return __popcnt64(x); + #endif +#elif MI_ARCH_X64 && defined(__BMI1__) + return (size_t)_mm_popcnt_u64(x); +#else + #define MI_HAS_FAST_POPCOUNT 0 + error define generic popcount +#endif +} + +#ifndef MI_HAS_FAST_POPCOUNT +#define MI_HAS_FAST_POPCOUNT 1 +#endif + + /* -------------------------------------------------------------------------------- find trailing/leading zero (bit scan forward/reverse) -------------------------------------------------------------------------------- */ diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index afdfe822..7d263d47 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -844,8 +844,10 @@ static inline mi_memid_t _mi_memid_none(void) { return _mi_memid_create(MI_MEM_NONE); } -static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { +static inline mi_memid_t _mi_memid_create_os(void* base, size_t size, bool committed, bool is_zero, bool is_large) { mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.mem.os.base = base; + memid.mem.os.size = size; memid.initially_committed = committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 6f2f9c5f..dafd25f1 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -171,6 +171,7 @@ static inline bool mi_memkind_is_os(mi_memkind_t memkind) { typedef struct mi_memid_os_info { void* base; // actual base address of the block (used for offset aligned allocations) size_t alignment; // alignment at allocation + size_t size; // allocated full size } mi_memid_os_info_t; typedef struct mi_memid_arena_info { diff --git a/src/arena.c b/src/arena.c index 194854a2..08b6c98d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -204,17 +204,19 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; - bool all_already_committed; - mi_bitmap_setN(&arena->slices_committed, slice_index, slice_count, &all_already_committed); - if (!all_already_committed) { + size_t already_committed_count = 0; + mi_bitmap_setN(&arena->slices_committed, slice_index, slice_count, &already_committed_count); + if (already_committed_count < slice_count) { + // recommit the full range bool commit_zero = false; + mi_stat_decrease(_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero, NULL)) { memid->initially_committed = false; } else { if (commit_zero) { memid->initially_zero = true; } } - } + } } else { // no need to commit, but check if already fully committed @@ -622,10 +624,6 @@ void _mi_arena_page_free(mi_page_t* page) { mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->next==NULL); - #if MI_STAT > 1 - _mi_page_free_collect(page, true); - #endif - #if MI_DEBUG>1 if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) { size_t bin = _mi_bin(mi_page_block_size(page)); diff --git a/src/bitmap.c b/src/bitmap.c index ed991441..c7c78dec 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -22,6 +22,11 @@ static inline size_t mi_bfield_ctz(mi_bfield_t x) { return mi_ctz(x); } + +static inline size_t mi_bfield_popcount(mi_bfield_t x) { + return mi_popcount(x); +} + //static inline size_t mi_bfield_clz(mi_bfield_t x) { // return mi_clz(x); //} @@ -70,26 +75,57 @@ static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, s } } +// Set a pair of bits atomically, and return true of the mask bits transitioned from all 0's to 1's. +static inline bool mi_bfield_atomic_set2(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_already_set) { + mi_assert_internal(idx < MI_BFIELD_BITS-1); + const size_t mask = (mi_bfield_t)0x03 << idx; + mi_bfield_t old = mi_atomic_load_relaxed(b); + while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success + if (all_already_set!=NULL) { *all_already_set = ((old&mask)==mask); } + return ((old&mask) == 0); +} + +// Clear a pair of bits atomically, and return true of the mask bits transitioned from all 1's to 0's +static inline bool mi_bfield_atomic_clear2(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_already_clear) { + mi_assert_internal(idx < MI_BFIELD_BITS-1); + const size_t mask = (mi_bfield_t)0x03 << idx; + mi_bfield_t old = mi_atomic_load_relaxed(b); + while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success + if (all_already_clear!=NULL) { *all_already_clear = ((old&mask) == 0); } + return ((old&mask) == mask); +} + +// Set/clear a pair of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's) +static inline bool mi_bfield_atomic_xset2(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx, bool* already_xset) { + if (set) { + return mi_bfield_atomic_set2(b, idx, already_xset); + } + else { + return mi_bfield_atomic_clear2(b, idx, already_xset); + } +} + + // Set a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's. -static inline bool mi_bfield_atomic_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_set) { +static inline bool mi_bfield_atomic_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, size_t* already_set) { mi_assert_internal(mask != 0); mi_bfield_t old = mi_atomic_load_relaxed(b); while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success - if (already_set!=NULL) { *already_set = ((old&mask)==mask); } + if (already_set!=NULL) { *already_set = mi_bfield_popcount(old&mask); } return ((old&mask) == 0); } // Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's -static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_clear) { +static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, size_t* already_clear) { mi_assert_internal(mask != 0); mi_bfield_t old = mi_atomic_load_relaxed(b); while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success - if (already_clear!=NULL) { *already_clear = ((old&mask)==0); } + if (already_clear!=NULL) { *already_clear = mi_bfield_popcount(~(old&mask)); } return ((old&mask) == mask); } // Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's) -static inline bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) { +static inline bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, size_t* already_xset) { mi_assert_internal(mask != 0); if (set) { return mi_bfield_atomic_set_mask(b, mask, already_xset); @@ -225,9 +261,8 @@ static inline bool mi_bitmap_chunk_xset2(mi_bit_t set, mi_bitmap_chunk_t* chunk, const size_t i = cidx / MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS; mi_assert_internal(idx < MI_BFIELD_BITS-1); - mi_assert_internal((idx%2)==0); - const size_t mask = (mi_bfield_t)0x03 << idx; - return mi_bfield_atomic_xset_mask(set, &chunk->bfields[i], mask, all_already_xset); + mi_assert_internal((idx%2)==0); + return mi_bfield_atomic_xset2(set, &chunk->bfields[i], idx, all_already_xset); } static inline bool mi_bitmap_chunk_set2(mi_bitmap_chunk_t* chunk, size_t cidx, bool* all_already_set) { @@ -241,11 +276,11 @@ static inline bool mi_bitmap_chunk_clear2(mi_bitmap_chunk_t* chunk, size_t cidx, // Set/clear a sequence of `n` bits within a chunk. // Returns true if all bits transitioned from 0 to 1 (or 1 to 0). -static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* pall_already_xset) { +static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, size_t* pall_already_xset) { mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); mi_assert_internal(n>0); bool all_transition = true; - bool all_already_xset = true; + size_t all_already_xset = 0; size_t idx = cidx % MI_BFIELD_BITS; size_t field = cidx / MI_BFIELD_BITS; while (n > 0) { @@ -254,9 +289,9 @@ static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t mi_assert_internal(idx + m <= MI_BFIELD_BITS); mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask, &already_xset ); - all_already_xset = all_already_xset && already_xset; + all_already_xset += already_xset; // next field field++; idx = 0; @@ -267,12 +302,12 @@ static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t } -static inline bool mi_bitmap_chunk_setN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* all_allready_set) { - return mi_bitmap_chunk_xsetN(MI_BIT_SET, chunk, cidx, n, all_allready_set); +static inline bool mi_bitmap_chunk_setN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { + return mi_bitmap_chunk_xsetN(MI_BIT_SET, chunk, cidx, n, already_set); } -static inline bool mi_bitmap_chunk_clearN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* all_allready_clear) { - return mi_bitmap_chunk_xsetN(MI_BIT_CLEAR, chunk, cidx, n, all_allready_clear); +static inline bool mi_bitmap_chunk_clearN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, size_t* already_clear) { + return mi_bitmap_chunk_xsetN(MI_BIT_CLEAR, chunk, cidx, n, already_clear); } @@ -829,7 +864,7 @@ bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). // `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* all_already_xset ) { +bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset ) { mi_assert_internal(n>0); mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); @@ -846,11 +881,11 @@ bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bo // first set the anyset since it is a conservative approximation (increases epoch) mi_bitmap_anyset_set(bitmap, chunk_idx); // then actually try to set it atomically - return mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, n, all_already_xset); + return mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, n, already_xset); } else { const size_t epoch = mi_bitmap_epoch(bitmap); - bool cleared = mi_bitmap_chunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, all_already_xset); + bool cleared = mi_bitmap_chunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, already_xset); if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); } diff --git a/src/bitmap.h b/src/bitmap.h index 62aab7a7..8c961fe1 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -65,10 +65,10 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n); // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's). // `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! // If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared. -bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* all_already_xset); +bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset); -static inline bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, bool* all_already_set) { - return mi_bitmap_xsetN(MI_BIT_SET, bitmap, idx, n, all_already_set); +static inline bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set) { + return mi_bitmap_xsetN(MI_BIT_SET, bitmap, idx, n, already_set); } static inline bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { diff --git a/src/options.c b/src/options.c index b69058cc..759d096d 100644 --- a/src/options.c +++ b/src/options.c @@ -158,7 +158,7 @@ static mi_option_desc_t options[_mi_option_last] = UNINIT, MI_OPTION(guarded_sample_rate)}, // 1 out of N allocations in the min/max range will be guarded (=4000) { 0, UNINIT, MI_OPTION(guarded_sample_seed)}, { 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable. - { 0, UNINIT, MI_OPTION(eager_abandon) }, + { 1, UNINIT, MI_OPTION(eager_abandon) }, }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index 0aa0a681..bac59437 100644 --- a/src/os.c +++ b/src/os.c @@ -128,21 +128,24 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st if (err != 0) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } - if (still_committed) { _mi_stat_decrease(&stats->committed, size); } + if (still_committed) { + _mi_stat_decrease(&stats->committed, size); + } _mi_stat_decrease(&stats->reserved, size); } void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats) { if (stats == NULL) stats = &_mi_stats_main; if (mi_memkind_is_os(memid.memkind)) { - size_t csize = _mi_os_good_alloc_size(size); + size_t csize = memid.mem.os.size; + if (csize==0) { _mi_os_good_alloc_size(size); } void* base = addr; // different base? (due to alignment) - if (memid.mem.os.base != NULL) { + if (memid.mem.os.base != base) { mi_assert(memid.mem.os.base <= addr); mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr); base = memid.mem.os.base; - csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); + if (memid.mem.os.size==0) { csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); } } // free it if (memid.memkind == MI_MEM_OS_HUGE) { @@ -296,7 +299,7 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { bool os_is_zero = false; void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); if (p != NULL) { - *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); + *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); } return p; } @@ -315,9 +318,10 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo void* os_base = NULL; void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, stats ); if (p != NULL) { - *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); + *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); memid->mem.os.base = os_base; memid->mem.os.alignment = alignment; + memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned } return p; } @@ -642,7 +646,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } if (page != 0) { mi_assert(start != NULL); - *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); + *memid = _mi_memid_create_os(start, *psize, true /* is committed */, all_zero, true /* is_large */); memid->memkind = MI_MEM_OS_HUGE; mi_assert(memid->is_pinned); #ifdef MI_TRACK_ASAN diff --git a/test/test-stress.c b/test/test-stress.c index 61d1424a..487f7215 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -40,7 +40,7 @@ static int ITER = 20; static int THREADS = 8; static int SCALE = 10; static int ITER = 10; -#elif 1 +#elif 0 static int THREADS = 4; static int SCALE = 100; static int ITER = 50;