diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 1d4063bb..c40b32f6 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -13,6 +13,9 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MI_BITS_H #define MI_BITS_H +#include // size_t +#include // int64_t etc +#include // bool // ------------------------------------------------------ // Size of a pointer. @@ -90,7 +93,7 @@ typedef int32_t mi_ssize_t; #endif #endif -#if (MI_ARCH_X86 || MI_ARCH_X64) +#if MI_ARCH_X64 && defined(__AVX2__) #include #elif MI_ARCH_ARM64 && MI_OPT_SIMD #include @@ -120,37 +123,20 @@ typedef int32_t mi_ssize_t; #define MI_MAX_VABITS (32) #endif - // use a flat page-map (or a 2-level one) #ifndef MI_PAGE_MAP_FLAT -#if MI_MAX_VABITS <= 40 && !MI_SECURE && !defined(__APPLE__) +#if MI_MAX_VABITS <= 40 && !defined(__APPLE__) #define MI_PAGE_MAP_FLAT 1 #else #define MI_PAGE_MAP_FLAT 0 #endif #endif -#if MI_PAGE_MAP_FLAT && MI_SECURE -#error should not use MI_PAGE_MAP_FLAT with a secure build -#endif - /* -------------------------------------------------------------------------------- Builtin's -------------------------------------------------------------------------------- */ -#if defined(__GNUC__) || defined(__clang__) -#define mi_unlikely(x) (__builtin_expect(!!(x),false)) -#define mi_likely(x) (__builtin_expect(!!(x),true)) -#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) -#define mi_unlikely(x) (x) [[unlikely]] -#define mi_likely(x) (x) [[likely]] -#else -#define mi_unlikely(x) (x) -#define mi_likely(x) (x) -#endif - - #ifndef __has_builtin #define __has_builtin(x) 0 #endif @@ -188,28 +174,17 @@ typedef int32_t mi_ssize_t; -------------------------------------------------------------------------------- */ size_t _mi_popcount_generic(size_t x); -extern bool _mi_cpu_has_popcnt; static inline size_t mi_popcount(size_t x) { - #if defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) - #if !defined(__BMI1__) - if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); } - #endif - size_t r; - __asm ("popcnt\t%1,%0" : "=r"(r) : "r"(x) : "cc"); - return r; - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86) - #if !defined(__BMI1__) - if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); } - #endif - return (size_t)mi_msc_builtinz(__popcnt)(x); - #elif defined(_MSC_VER) && MI_ARCH_ARM64 - return (size_t)mi_msc_builtinz(__popcnt)(x); - #elif mi_has_builtinz(popcount) + #if mi_has_builtinz(popcount) return mi_builtinz(popcount)(x); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + return mi_msc_builtinz(__popcnt)(x); + #elif MI_ARCH_X64 && defined(__BMI1__) + return (size_t)_mm_popcnt_u64(x); #else #define MI_HAS_FAST_POPCOUNT 0 - return _mi_popcount_generic(x); + return (x<=1 ? x : _mi_popcount_generic(x)); #endif } @@ -223,29 +198,19 @@ size_t _mi_clz_generic(size_t x); size_t _mi_ctz_generic(size_t x); static inline size_t mi_ctz(size_t x) { - #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) + #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0 size_t r; __asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; - #elif defined(__GNUC__) && MI_ARCH_X64 - // tzcnt is interpreted as bsf if BMI1 is not supported (pre-haswell) - // if the argument is zero: - // - tzcnt: sets carry-flag, and returns MI_SIZE_BITS - // - bsf : sets zero-flag, and leaves the destination _unmodified_ (on both AMD and Intel now, see ) - // so we always initialize r to MI_SIZE_BITS to work correctly on all cpu's without branching - size_t r = MI_SIZE_BITS; - __asm ("tzcnt\t%1, %0" : "+r"(r) : "r"(x) : "cc"); // use '+r' to keep the assignment to r in case this becomes bsf on older cpu's - return r; + #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) + return _tzcnt_u64(x); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long idx; + return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS); #elif mi_has_builtinz(ctz) return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS); - #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) - return (x!=0 ? _tzcnt_u64(x) : MI_SIZE_BITS); // ensure it still works on non-BMI1 cpu's as well - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071) - unsigned long idx; mi_msc_builtinz(_BitScanForward)(&idx, x); - return (size_t)idx; - #elif defined(__GNUC__) && MI_ARCH_X86 - size_t r = MI_SIZE_BITS; + #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) + size_t r = MI_SIZE_BITS; // bsf leaves destination unmodified if the argument is 0 (see ) __asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc"); return r; #elif MI_HAS_FAST_POPCOUNT @@ -257,18 +222,17 @@ static inline size_t mi_ctz(size_t x) { } static inline size_t mi_clz(size_t x) { - // we don't optimize anymore to lzcnt as there are still non BMI1 cpu's around (like Intel Celeron, see issue #1016) - // on pre-haswell cpu's lzcnt gets executed as bsr which is not equivalent (at it returns the bit position) #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0 size_t r; __asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; + #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) + return _lzcnt_u64(x); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long idx; + return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS); #elif mi_has_builtinz(clz) return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS); - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071) - unsigned long idx; mi_msc_builtinz(_BitScanReverse)(&idx, x); - return (MI_SIZE_BITS - 1 - (size_t)idx); #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) if (x==0) return MI_SIZE_BITS; size_t r; @@ -297,11 +261,9 @@ static inline bool mi_bsf(size_t x, size_t* idx) { bool is_zero; __asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" ); return !is_zero; - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071) - unsigned long i; mi_msc_builtinz(_BitScanForward)(&i, x); - *idx = (size_t)i; - return true; + #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long i; + return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false); #else return (x!=0 ? (*idx = mi_ctz(x), true) : false); #endif @@ -311,11 +273,14 @@ static inline bool mi_bsf(size_t x, size_t* idx) { // return false if `x==0` (with `*idx` undefined) and true otherwise, // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). static inline bool mi_bsr(size_t x, size_t* idx) { - #if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071) - unsigned long i; mi_msc_builtinz(_BitScanReverse)(&i, x); - *idx = (size_t)i; - return true; + #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9) + // on x64 the carry flag is set on zero which gives better codegen + bool is_zero; + __asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc"); + return !is_zero; + #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long i; + return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false); #else return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false); #endif diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 71b2c93b..b4e0cae6 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -22,6 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #include // ptrdiff_t #include // uintptr_t, uint16_t, etc +#include // bool #include // SIZE_MAX etc. #include // error codes #include "bits.h" // size defines (MI_INTPTR_SIZE etc), bit operations