improve popcount

This commit is contained in:
daanx 2024-12-10 14:50:55 -08:00
parent f37aff6ee2
commit 7cd8f31f30
2 changed files with 27 additions and 25 deletions

View file

@ -144,11 +144,31 @@ typedef int32_t mi_ssize_t;
#define mi_msc_builtinz(name) name##64
#endif
/* --------------------------------------------------------------------------------
Count trailing/leading zero's
Popcount and count trailing/leading zero's
-------------------------------------------------------------------------------- */
size_t _mi_popcount_generic(size_t x);
static inline size_t mi_popcount(size_t x) {
#if mi_has_builtinz(popcount)
return mi_builtinz(popcount)(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
return mi_msc_builtinz(__popcnt)(x);
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_mm_popcnt_u64(x);
#else
#define MI_HAS_FAST_POPCOUNT 0
return (x<=1 ? x : _mi_popcount_generic(x));
#endif
}
#ifndef MI_HAS_FAST_POPCOUNT
#define MI_HAS_FAST_POPCOUNT 1
#endif
size_t _mi_clz_generic(size_t x);
size_t _mi_ctz_generic(size_t x);
@ -169,6 +189,8 @@ static inline size_t mi_ctz(size_t x) {
size_t r;
__asm ("bsf\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif MI_HAS_FAST_POPCOUNT
return (x!=0 ? (mi_popcount(x^(x-1))-1) : MI_SIZE_BITS);
#else
#define MI_HAS_FAST_BITSCAN 0
return (x!=0 ? _mi_ctz_generic(x) : MI_SIZE_BITS);
@ -179,7 +201,7 @@ static inline size_t mi_clz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
size_t r;
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
return r;
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_lzcnt_u64(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
@ -202,26 +224,6 @@ static inline size_t mi_clz(size_t x) {
#define MI_HAS_FAST_BITSCAN 1
#endif
size_t _mi_popcount_generic(size_t x);
static inline size_t mi_popcount(size_t x) {
#if mi_has_builtinz(popcount)
return mi_builtinz(popcount)(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
return mi_msc_builtinz(__popcnt)(x);
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_mm_popcnt_u64(x);
#else
#define MI_HAS_FAST_POPCOUNT 0
return (x<=1 ? x : _mi_popcount_generic(x));
#endif
}
#ifndef MI_HAS_FAST_POPCOUNT
#define MI_HAS_FAST_POPCOUNT 1
#endif
/* --------------------------------------------------------------------------------
find trailing/leading zero (bit scan forward/reverse)
-------------------------------------------------------------------------------- */

View file

@ -283,7 +283,7 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
#if !MI_HAS_FAST_BITSCAN
static size_t mi_ctz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
// de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
static const uint8_t debruijn[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
@ -293,7 +293,7 @@ static size_t mi_ctz_generic32(uint32_t x) {
}
static size_t mi_clz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
// de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
static const uint8_t debruijn[32] = {
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0