diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 4f0dce71..c0405d6f 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -144,11 +144,31 @@ typedef int32_t mi_ssize_t; #define mi_msc_builtinz(name) name##64 #endif - /* -------------------------------------------------------------------------------- - Count trailing/leading zero's + Popcount and count trailing/leading zero's -------------------------------------------------------------------------------- */ +size_t _mi_popcount_generic(size_t x); + +static inline size_t mi_popcount(size_t x) { + #if mi_has_builtinz(popcount) + return mi_builtinz(popcount)(x); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + return mi_msc_builtinz(__popcnt)(x); + #elif MI_ARCH_X64 && defined(__BMI1__) + return (size_t)_mm_popcnt_u64(x); + #else + #define MI_HAS_FAST_POPCOUNT 0 + return (x<=1 ? x : _mi_popcount_generic(x)); + #endif +} + +#ifndef MI_HAS_FAST_POPCOUNT +#define MI_HAS_FAST_POPCOUNT 1 +#endif + + + size_t _mi_clz_generic(size_t x); size_t _mi_ctz_generic(size_t x); @@ -169,6 +189,8 @@ static inline size_t mi_ctz(size_t x) { size_t r; __asm ("bsf\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; + #elif MI_HAS_FAST_POPCOUNT + return (x!=0 ? (mi_popcount(x^(x-1))-1) : MI_SIZE_BITS); #else #define MI_HAS_FAST_BITSCAN 0 return (x!=0 ? _mi_ctz_generic(x) : MI_SIZE_BITS); @@ -179,7 +201,7 @@ static inline size_t mi_clz(size_t x) { #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0 size_t r; __asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); - return r; + return r; #elif MI_ARCH_X64 && defined(__BMI1__) return (size_t)_lzcnt_u64(x); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) @@ -202,26 +224,6 @@ static inline size_t mi_clz(size_t x) { #define MI_HAS_FAST_BITSCAN 1 #endif -size_t _mi_popcount_generic(size_t x); - -static inline size_t mi_popcount(size_t x) { - #if mi_has_builtinz(popcount) - return mi_builtinz(popcount)(x); - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - return mi_msc_builtinz(__popcnt)(x); - #elif MI_ARCH_X64 && defined(__BMI1__) - return (size_t)_mm_popcnt_u64(x); - #else - #define MI_HAS_FAST_POPCOUNT 0 - return (x<=1 ? x : _mi_popcount_generic(x)); - #endif -} - -#ifndef MI_HAS_FAST_POPCOUNT -#define MI_HAS_FAST_POPCOUNT 1 -#endif - - /* -------------------------------------------------------------------------------- find trailing/leading zero (bit scan forward/reverse) -------------------------------------------------------------------------------- */ diff --git a/src/libc.c b/src/libc.c index 15d4d2a7..eed63d87 100644 --- a/src/libc.c +++ b/src/libc.c @@ -283,7 +283,7 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) { #if !MI_HAS_FAST_BITSCAN static size_t mi_ctz_generic32(uint32_t x) { - // de Bruijn multiplication, see + // de Bruijn multiplication, see static const uint8_t debruijn[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 @@ -293,7 +293,7 @@ static size_t mi_ctz_generic32(uint32_t x) { } static size_t mi_clz_generic32(uint32_t x) { - // de Bruijn multiplication, see + // de Bruijn multiplication, see static const uint8_t debruijn[32] = { 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0