diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index e1951cf7..3afac04d 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -36,6 +36,12 @@ terms of the MIT license. A copy of the license can be found in the file #error platform pointers must be 32, 64, or 128 bits #endif +#if (INTPTR_MAX) > LONG_MAX +# define MI_PU(x) x##ULL +#else +# define MI_PU(x) x##UL +#endif + #if SIZE_MAX == UINT64_MAX # define MI_SIZE_SHIFT (3) typedef int64_t mi_ssize_t; @@ -43,15 +49,13 @@ typedef int64_t mi_ssize_t; # define MI_SIZE_SHIFT (2) typedef int32_t mi_ssize_t; #else -#error platform objects must be 32 or 64 bits +#error platform objects must be 32 or 64 bits in size #endif #if (SIZE_MAX/2) > LONG_MAX # define MI_ZU(x) x##ULL -# define MI_ZI(x) x##LL #else # define MI_ZU(x) x##UL -# define MI_ZI(x) x##L #endif #define MI_INTPTR_SIZE (1< @@ -352,30 +272,15 @@ static inline size_t mi_rotr(size_t x, size_t r) { #endif } -static inline uint32_t mi_rotr32(uint32_t x, uint32_t r) { - #if mi_has_builtin(rotateright32) - return mi_builtin(rotateright32)(x, r); - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - return _lrotr(x, (int)r); - #else - // The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to - // avoid UB when `rshift==0`. See - const unsigned int rshift = (unsigned int)(r) & 31; - return ((x >> rshift) | (x << ((-rshift) & 31))); - #endif -} - static inline size_t mi_rotl(size_t x, size_t r) { #if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64) return mi_builtin(rotateleft64)(x,r); #elif (mi_has_builtin(rotateleft32) && MI_SIZE_BITS==32) return mi_builtin(rotateleft32)(x,r); - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - #if MI_SIZE_BITS==32 - return _lrotl(x,(int)r); - #else - return _rotl64(x,(int)r); - #endif + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64) + return _rotl64(x, (int)r); + #elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32) + return _lrotl(x, (int)r); #else // The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to // avoid UB when `rshift==0`. See @@ -385,5 +290,4 @@ static inline size_t mi_rotl(size_t x, size_t r) { } - #endif // MI_BITS_H diff --git a/src/libc.c b/src/libc.c index 20e9e38b..3fdbf3e7 100644 --- a/src/libc.c +++ b/src/libc.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -277,10 +277,12 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) { // -------------------------------------------------------- -// generic trailing and leading zero count +// generic trailing and leading zero count, and popcount // -------------------------------------------------------- -uint32_t _mi_ctz_generic32(uint32_t x) { +#if !MI_HAS_FAST_BITSCAN + +static size_t mi_ctz_generic32(uint32_t x) { // de Bruijn multiplication, see static const uint8_t debruijn[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, @@ -319,10 +321,71 @@ size_t _mi_clz_generic(size_t x) { size_t _mi_ctz_generic(size_t x) { if (x==0) return MI_SIZE_BITS; #if (MI_SIZE_BITS <= 32) - return _mi_ctz_generic32((uint32_t)x); + return mi_ctz_generic32((uint32_t)x); #else - const size_t count = _mi_ctz_generic32((uint32_t)x); + const size_t count = mi_ctz_generic32((uint32_t)x); if (count < 32) return count; - return (32 + _mi_ctz_generic32((uint32_t)(x>>32))); + return (32 + mi_ctz_generic32((uint32_t)(x>>32))); #endif } + +#endif // bit scan + +#if !MI_HAS_FAST_POPCOUNT + +#if MI_SIZE_SIZE == 4 +#define mi_mask_even_bits32 (0x55555555) +#define mi_mask_even_pairs32 (0x33333333) +#define mi_mask_even_nibbles32 (0x0F0F0F0F) + +// sum of all the bytes in `x` if it is guaranteed that the sum < 256! +static size_t mi_byte_sum32(uint32_t x) { + // perform `x * 0x01010101`: the highest byte contains the sum of all bytes. + x += (x << 8); + x += (x << 16); + return (size_t)(x >> 24); +} + +static size_t mi_popcount_generic32(uint32_t x) { + // first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10 + // in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair + // into the lower bit-pair: + x = x - ((x >> 1) & mi_mask_even_bits32); + // add the 2-bit pair results + x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32); + // add the 4-bit nibble results + x = (x + (x >> 4)) & mi_mask_even_nibbles32; + // each byte now has a count of its bits, we can sum them now: + return mi_byte_sum32(x); +} + +size_t _mi_popcount_generic(size_t x) { + return mi_popcount_generic32(x); +} + +#else +#define mi_mask_even_bits64 (0x5555555555555555) +#define mi_mask_even_pairs64 (0x3333333333333333) +#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F) + +// sum of all the bytes in `x` if it is guaranteed that the sum < 256! +static size_t mi_byte_sum64(uint64_t x) { + x += (x << 8); + x += (x << 16); + x += (x << 32); + return (size_t)(x >> 56); +} + +static size_t mi_popcount_generic64(uint64_t x) { + x = x - ((x >> 1) & mi_mask_even_bits64); + x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64); + x = (x + (x >> 4)) & mi_mask_even_nibbles64; + return mi_byte_sum64(x); +} + +size_t _mi_popcount_generic(size_t x) { + return mi_popcount_generic64(x); +} +#endif + +#endif // popcount diff --git a/src/os.c b/src/os.c index b05068fd..0c020302 100644 --- a/src/os.c +++ b/src/os.c @@ -175,7 +175,7 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; - mi_stat_counter_increase(stats->mmap_calls, 1); + _mi_stat_counter_increase(&stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); if (commit) {