diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 145f6f92..29f3749c 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -107,7 +107,6 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback fro size_t _mi_bin_size(uint8_t bin); // for stats uint8_t _mi_bin(size_t size); // for stats -uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD in "os.c" // "heap.c" void _mi_heap_destroy_pages(mi_heap_t* heap); @@ -744,5 +743,107 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { } #endif +// ----------------------------------------------------------------------- +// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) +// ----------------------------------------------------------------------- + +#if defined(__GNUC__) + +#include // LONG_MAX +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_clzl(x); +#else + return __builtin_clzll(x); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_ctzl(x); +#else + return __builtin_ctzll(x); +#endif +} + +#elif defined(_MSC_VER) + +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanReverse(&idx, x); +#else + _BitScanReverse64(&idx, x); +#endif + return ((MI_INTPTR_BITS - 1) - idx); +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanForward(&idx, x); +#else + _BitScanForward64(&idx, x); +#endif + return idx; +} + +#else +static inline size_t mi_ctz32(uint32_t x) { + // de Bruijn multiplication, see + static const unsigned char debruijn[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + if (x==0) return 32; + return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27]; +} +static inline size_t mi_clz32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, + 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 + }; + if (x==0) return 32; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27]; +} + +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_clz32((uint32_t)x); +#else + size_t count = mi_clz32((uint32_t)(x >> 32)); + if (count < 32) return count; + return (32 + mi_clz32((uint32_t)x)); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_ctz32((uint32_t)x); +#else + size_t count = mi_ctz32((uint32_t)x); + if (count < 32) return count; + return (32 + mi_ctz32((uint32_t)(x>>32))); +#endif +} + +#endif + +// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero) +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); +} + #endif diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 739ab2b6..9d7184e7 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -68,47 +68,6 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { } -/* ----------------------------------------------------------- - Use bit scan forward/reverse to quickly find the first zero bit if it is available ------------------------------------------------------------ */ -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -#ifndef MI_64 -#if MI_INTPTR_SIZE==8 -#define MI_64(f) f##64 -#else -#define MI_64(f) f -#endif -#endif - -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - MI_64(_BitScanForward)(&idx, x); - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - MI_64(_BitScanReverse)(&idx, x); - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#include // LONG_MAX -#define MI_HAVE_BITSCAN -#if (INTPTR_MAX == LONG_MAX) -# define MI_L(x) x##l -#else -# define MI_L(x) x##ll -#endif -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); -} -#endif /* ----------------------------------------------------------- Claim a bit sequence atomically @@ -148,8 +107,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx const uintptr_t mask = mi_bitmap_mask_(count, 0); const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; -#ifdef MI_HAVE_BITSCAN - size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible +#ifdef MI_HAVE_FAST_BITSCAN + size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible #else size_t bitidx = 0; // otherwise start at 0 #endif @@ -157,7 +116,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx // scan linearly for a free range of zero bits while (bitidx <= bitidx_max) { - if ((map & m) == 0) { // are the mask bits free at bitidx? + const uintptr_t mapm = map & m; + if (mapm == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? const uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); @@ -173,8 +133,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx } else { // on to the next bit range -#ifdef MI_HAVE_BITSCAN - const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); +#ifdef MI_HAVE_FAST_BITSCAN + const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); mi_assert_internal(shift > 0 && shift <= count); #else const size_t shift = 1; @@ -270,8 +230,8 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz // check initial trailing zeros _Atomic(uintptr_t)* field = &bitmap[idx]; uintptr_t map = mi_atomic_load_relaxed(field); - const uintptr_t bitidx = (map==0 ? 0 : mi_bsr(map) + 1); - const size_t initial = MI_BITMAP_FIELD_BITS - bitidx; // count of initial zeros starting at idx + const size_t initial = mi_clz(map); // count of initial zeros starting at idx + mi_assert_internal(initial >= 0 && initial <= MI_BITMAP_FIELD_BITS); if (initial == 0) return false; if (initial >= count) return mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries @@ -321,7 +281,7 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); // claimed! - *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); return true; rollback: diff --git a/src/os.c b/src/os.c index 437b8f1a..e8d6b74a 100644 --- a/src/os.c +++ b/src/os.c @@ -358,7 +358,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro int fd = -1; #if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { - size_t n = _mi_bsr(try_alignment); + size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } diff --git a/src/page-queue.c b/src/page-queue.c index 37719e02..57e3d6a5 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -49,50 +49,6 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { Bins ----------------------------------------------------------- */ -// Bit scan reverse: return the index of the highest bit. -static inline uint8_t mi_bsr32(uint32_t x); - -#if defined(_MSC_VER) -#include -static inline uint8_t mi_bsr32(uint32_t x) { - uint32_t idx; - _BitScanReverse((DWORD*)&idx, x); - return (uint8_t)idx; -} -#elif defined(__GNUC__) || defined(__clang__) -static inline uint8_t mi_bsr32(uint32_t x) { - return (31 - __builtin_clz(x)); -} -#else -static inline uint8_t mi_bsr32(uint32_t x) { - // de Bruijn multiplication, see - static const uint8_t debruijn[32] = { - 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, - 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, - }; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return debruijn[(x*0x076be629) >> 27]; -} -#endif - -// Bit scan reverse: return the index of the highest bit. -uint8_t _mi_bsr(uintptr_t x) { - if (x == 0) return 0; -#if MI_INTPTR_SIZE==8 - uint32_t hi = (x >> 32); - return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); -#elif MI_INTPTR_SIZE==4 - return mi_bsr32(x); -#else -# error "define bsr for non-32 or 64-bit platforms" -#endif -} - // Return the bin for a given field size. // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", @@ -125,7 +81,7 @@ extern inline uint8_t _mi_bin(size_t size) { #endif wsize--; // find the highest bit - uint8_t b = mi_bsr32((uint32_t)wsize); + uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0 // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin