From c62d27683528d1e994fcd20f471693236c32204c Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 11 Feb 2025 14:27:01 -0800 Subject: [PATCH 1/2] optimize mi_bin --- src/page-queue.c | 39 ++++++++++++++----------------- test/main-override-static.c | 46 +++++++++++++++++++++---------------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/page-queue.c b/src/page-queue.c index 67b54650..e2728a1f 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -58,26 +58,22 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. static inline uint8_t mi_bin(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; + size_t wsize = _mi_wsize_from_size(size); +#if defined(MI_ALIGN4W) + if mi_likely(wsize <= 4) { + return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes +#elif defined(MI_ALIGN2W) + if mi_likely(wsize <= 8) { + return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes +#else + if mi_likely(wsize <= 8) { + return (wsize == 0 ? 1 : wsize); } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_OBJ_WSIZE_MAX) { - bin = MI_BIN_HUGE; +#endif + else if mi_unlikely(wsize > MI_LARGE_OBJ_WSIZE_MAX) { + return MI_BIN_HUGE; } else { #if defined(MI_ALIGN4W) @@ -85,15 +81,14 @@ static inline uint8_t mi_bin(size_t size) { #endif wsize--; // find the highest bit - uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0 + const size_t b = mi_bsr(wsize); // note: wsize != 0 // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin - bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; - mi_assert_internal(bin < MI_BIN_HUGE); + const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3; + mi_assert_internal(bin > 0 && bin < MI_BIN_HUGE); + return bin; } - mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); - return bin; } diff --git a/test/main-override-static.c b/test/main-override-static.c index 6070794b..4a1f81dc 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -32,7 +32,10 @@ static void test_manage_os_memory(void); int main() { mi_version(); mi_stats_reset(); - test_manage_os_memory(); + + mi_bins(); + + // test_manage_os_memory(); // test_large_pages(); // detect double frees and heap corruption // double_free1(); @@ -40,7 +43,7 @@ int main() { // corrupt_free(); // block_overflow1(); // block_overflow2(); - test_canary_leak(); + // test_canary_leak(); // test_aslr(); // invalid_free(); // test_reserved(); @@ -48,8 +51,6 @@ int main() { // test_heap_walk(); // alloc_huge(); - // mi_bins(); - void* p1 = malloc(78); void* p2 = malloc(24); @@ -314,7 +315,7 @@ static void test_large_pages(void) { // bin size experiments // ------------------------------ -#if 0 +#if 1 #include #include @@ -376,31 +377,34 @@ static inline size_t _mi_wsize_from_size(size_t size) { return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); } +// #define MI_ALIGN2W + // Return the bin for a given field size. // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. -extern inline uint8_t _mi_bin8(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { +static inline size_t mi_bin(size_t wsize) { + // size_t wsize = _mi_wsize_from_size(size); + // size_t bin; + /*if (wsize <= 1) { bin = 1; } + */ #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + if (wsize <= 4) { + return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes } #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + if (wsize <= 8) { + return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes } #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; + if (wsize <= 8) { + return (wsize == 0 ? 1 : wsize); } #endif else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; + return MI_BIN_HUGE; } else { #if defined(MI_ALIGN4W) @@ -408,15 +412,17 @@ extern inline uint8_t _mi_bin8(size_t size) { #endif wsize--; // find the highest bit - uint8_t b = mi_bsr32((uint32_t)wsize); + const size_t b = _mi_bsr(wsize); // note: wsize != 0 // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin - bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3; + assert(bin > 0 && bin < MI_BIN_HUGE); + return bin; } - return bin; } + static inline uint8_t _mi_bin4(size_t size) { size_t wsize = _mi_wsize_from_size(size); uint8_t bin; @@ -472,7 +478,7 @@ static size_t _mi_binx8(size_t bsize) { } -static inline size_t mi_bin(size_t wsize) { +static inline size_t mi_binx(size_t wsize) { uint8_t bin; if (wsize <= 1) { bin = 1; From ece1defe5b2c0862e9dc34cf586c0fa0612a13d6 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 11 Feb 2025 14:35:21 -0800 Subject: [PATCH 2/2] parameter of clz/ctz is size_t --- include/mimalloc/internal.h | 68 ++++++++++++++++++------------------- src/bitmap.c | 10 +++--- src/heap.c | 2 +- src/page-queue.c | 12 +++---- src/prim/windows/prim.c | 2 +- test/main-override-static.c | 4 +-- 6 files changed, 49 insertions(+), 49 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 8d26de47..d965c275 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -213,8 +213,8 @@ void _mi_deferred_free(mi_heap_t* heap, bool force); void _mi_page_free_collect(mi_page_t* page,bool force); void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments -size_t _mi_bin_size(uint8_t bin); // for stats -uint8_t _mi_bin(size_t size); // for stats +size_t _mi_bin_size(size_t bin); // for stats +size_t _mi_bin(size_t size); // for stats // "heap.c" void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag); @@ -839,21 +839,21 @@ static inline size_t _mi_os_numa_node_count(void) { #include // LONG_MAX #define MI_HAVE_FAST_BITSCAN -static inline size_t mi_clz(uintptr_t x) { - if (x==0) return MI_INTPTR_BITS; -#if (INTPTR_MAX == LONG_MAX) - return __builtin_clzl(x); -#else - return __builtin_clzll(x); -#endif +static inline size_t mi_clz(size_t x) { + if (x==0) return MI_SIZE_BITS; + #if (SIZE_MAX == ULONG_MAX) + return __builtin_clzl(x); + #else + return __builtin_clzll(x); + #endif } -static inline size_t mi_ctz(uintptr_t x) { - if (x==0) return MI_INTPTR_BITS; -#if (INTPTR_MAX == LONG_MAX) - return __builtin_ctzl(x); -#else - return __builtin_ctzll(x); -#endif +static inline size_t mi_ctz(size_t x) { + if (x==0) return MI_SIZE_BITS; + #if (SIZE_MAX == ULONG_MAX) + return __builtin_ctzl(x); + #else + return __builtin_ctzll(x); + #endif } #elif defined(_MSC_VER) @@ -861,24 +861,24 @@ static inline size_t mi_ctz(uintptr_t x) { #include // LONG_MAX #include // BitScanReverse64 #define MI_HAVE_FAST_BITSCAN -static inline size_t mi_clz(uintptr_t x) { - if (x==0) return MI_INTPTR_BITS; +static inline size_t mi_clz(size_t x) { + if (x==0) return MI_SIZE_BITS; unsigned long idx; -#if (INTPTR_MAX == LONG_MAX) - _BitScanReverse(&idx, x); -#else - _BitScanReverse64(&idx, x); -#endif - return ((MI_INTPTR_BITS - 1) - idx); + #if (SIZE_MAX == ULONG_MAX) + _BitScanReverse(&idx, x); + #else + _BitScanReverse64(&idx, x); + #endif + return ((MI_SIZE_BITS - 1) - idx); } -static inline size_t mi_ctz(uintptr_t x) { - if (x==0) return MI_INTPTR_BITS; +static inline size_t mi_ctz(size_t x) { + if (x==0) return MI_SIZE_BITS; unsigned long idx; -#if (INTPTR_MAX == LONG_MAX) - _BitScanForward(&idx, x); -#else - _BitScanForward64(&idx, x); -#endif + #if (SIZE_MAX == ULONG_MAX) + _BitScanForward(&idx, x); + #else + _BitScanForward64(&idx, x); + #endif return idx; } @@ -941,9 +941,9 @@ static inline size_t mi_clz(size_t x) { #endif -// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero) -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); +// "bit scan reverse": Return index of the highest bit (or MI_SIZE_BITS if `x` is zero) +static inline size_t mi_bsr(size_t x) { + return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x)); } diff --git a/src/bitmap.c b/src/bitmap.c index 98f6ab7b..2def7bb0 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -81,7 +81,7 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ // on to the next bit range #ifdef MI_HAVE_FAST_BITSCAN mi_assert_internal(mapm != 0); - const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); + const size_t shift = (count == 1 ? 1 : (MI_SIZE_BITS - mi_clz(mapm) - bitidx)); mi_assert_internal(shift > 0 && shift <= count); #else const size_t shift = 1; @@ -146,7 +146,7 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size return ((field & mask) == mask); } -// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. // Returns `true` if successful when all previous `count` bits were 0. bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); @@ -154,9 +154,9 @@ bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); - do { + do { if ((expected & mask) != 0) return false; - } + } while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); mi_assert_internal((expected & mask) == 0); return true; @@ -194,7 +194,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit if (initial == 0) return false; if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries - + // scan ahead size_t found = initial; size_t mask = 0; // mask bits for the final field diff --git a/src/heap.c b/src/heap.c index f856a426..58aa050c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -552,7 +552,7 @@ void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) { static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) { mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX); - *shift = MI_INTPTR_BITS - mi_clz(divisor - 1); + *shift = MI_SIZE_BITS - mi_clz(divisor - 1); *magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1); } diff --git a/src/page-queue.c b/src/page-queue.c index e2728a1f..9fc5f924 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -57,7 +57,7 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. -static inline uint8_t mi_bin(size_t size) { +static inline size_t mi_bin(size_t size) { size_t wsize = _mi_wsize_from_size(size); #if defined(MI_ALIGN4W) if mi_likely(wsize <= 4) { @@ -81,7 +81,7 @@ static inline uint8_t mi_bin(size_t size) { #endif wsize--; // find the highest bit - const size_t b = mi_bsr(wsize); // note: wsize != 0 + const size_t b = (MI_SIZE_BITS - 1 - mi_clz(wsize)); // note: wsize != 0 // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin @@ -97,11 +97,11 @@ static inline uint8_t mi_bin(size_t size) { Queue of pages with free blocks ----------------------------------------------------------- */ -uint8_t _mi_bin(size_t size) { +size_t _mi_bin(size_t size) { return mi_bin(size); } -size_t _mi_bin_size(uint8_t bin) { +size_t _mi_bin_size(size_t bin) { return _mi_heap_empty.pages[bin].block_size; } @@ -138,7 +138,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { mi_assert_internal(heap!=NULL); - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); + size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; mi_assert_internal((mi_page_block_size(page) == pq->block_size) || @@ -180,7 +180,7 @@ static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_que } else { // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped - uint8_t bin = mi_bin(size); + size_t bin = mi_bin(size); const mi_page_queue_t* prev = pq - 1; while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) { prev--; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 20f833bd..a080f4bc 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -123,7 +123,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; } // get virtual address bits if ((uintptr_t)si.lpMaximumApplicationAddress > 0) { - const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress); + const size_t vbits = MI_SIZE_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress); config->virtual_address_bits = vbits; } diff --git a/test/main-override-static.c b/test/main-override-static.c index 4a1f81dc..06d7baa5 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -33,7 +33,7 @@ int main() { mi_version(); mi_stats_reset(); - mi_bins(); + // mi_bins(); // test_manage_os_memory(); // test_large_pages(); @@ -315,7 +315,7 @@ static void test_large_pages(void) { // bin size experiments // ------------------------------ -#if 1 +#if 0 #include #include