mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-06 23:39:31 +03:00
improve popcount
This commit is contained in:
parent
f37aff6ee2
commit
7cd8f31f30
2 changed files with 27 additions and 25 deletions
|
@ -144,11 +144,31 @@ typedef int32_t mi_ssize_t;
|
|||
#define mi_msc_builtinz(name) name##64
|
||||
#endif
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Count trailing/leading zero's
|
||||
Popcount and count trailing/leading zero's
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
size_t _mi_popcount_generic(size_t x);
|
||||
|
||||
static inline size_t mi_popcount(size_t x) {
|
||||
#if mi_has_builtinz(popcount)
|
||||
return mi_builtinz(popcount)(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
return mi_msc_builtinz(__popcnt)(x);
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_mm_popcnt_u64(x);
|
||||
#else
|
||||
#define MI_HAS_FAST_POPCOUNT 0
|
||||
return (x<=1 ? x : _mi_popcount_generic(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef MI_HAS_FAST_POPCOUNT
|
||||
#define MI_HAS_FAST_POPCOUNT 1
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
size_t _mi_clz_generic(size_t x);
|
||||
size_t _mi_ctz_generic(size_t x);
|
||||
|
||||
|
@ -169,6 +189,8 @@ static inline size_t mi_ctz(size_t x) {
|
|||
size_t r;
|
||||
__asm ("bsf\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif MI_HAS_FAST_POPCOUNT
|
||||
return (x!=0 ? (mi_popcount(x^(x-1))-1) : MI_SIZE_BITS);
|
||||
#else
|
||||
#define MI_HAS_FAST_BITSCAN 0
|
||||
return (x!=0 ? _mi_ctz_generic(x) : MI_SIZE_BITS);
|
||||
|
@ -179,7 +201,7 @@ static inline size_t mi_clz(size_t x) {
|
|||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
|
||||
size_t r;
|
||||
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
return r;
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_lzcnt_u64(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
|
@ -202,26 +224,6 @@ static inline size_t mi_clz(size_t x) {
|
|||
#define MI_HAS_FAST_BITSCAN 1
|
||||
#endif
|
||||
|
||||
size_t _mi_popcount_generic(size_t x);
|
||||
|
||||
static inline size_t mi_popcount(size_t x) {
|
||||
#if mi_has_builtinz(popcount)
|
||||
return mi_builtinz(popcount)(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
return mi_msc_builtinz(__popcnt)(x);
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_mm_popcnt_u64(x);
|
||||
#else
|
||||
#define MI_HAS_FAST_POPCOUNT 0
|
||||
return (x<=1 ? x : _mi_popcount_generic(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef MI_HAS_FAST_POPCOUNT
|
||||
#define MI_HAS_FAST_POPCOUNT 1
|
||||
#endif
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
find trailing/leading zero (bit scan forward/reverse)
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
|
|
@ -283,7 +283,7 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
|
|||
#if !MI_HAS_FAST_BITSCAN
|
||||
|
||||
static size_t mi_ctz_generic32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
// de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
|
||||
static const uint8_t debruijn[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
||||
|
@ -293,7 +293,7 @@ static size_t mi_ctz_generic32(uint32_t x) {
|
|||
}
|
||||
|
||||
static size_t mi_clz_generic32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
// de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
|
||||
static const uint8_t debruijn[32] = {
|
||||
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
|
||||
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
|
||||
|
|
Loading…
Add table
Reference in a new issue