mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-03 14:09:31 +03:00
update popcnt to be more efficient on x64 even without MI_OPT_ARCH=ON
This commit is contained in:
parent
7e721c881b
commit
d9580f3bfb
5 changed files with 68 additions and 39 deletions
|
@ -316,7 +316,7 @@
|
|||
<CompileAs>CompileAsCpp</CompileAs>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<LanguageStandard>stdcpp20</LanguageStandard>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
|
||||
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
|
|
@ -90,7 +90,7 @@ typedef int32_t mi_ssize_t;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if MI_ARCH_X64 && defined(__AVX2__)
|
||||
#if (MI_ARCH_X86 || MI_ARCH_X64)
|
||||
#include <immintrin.h>
|
||||
#elif MI_ARCH_ARM64 && MI_OPT_SIMD
|
||||
#include <arm_neon.h>
|
||||
|
@ -134,6 +134,18 @@ typedef int32_t mi_ssize_t;
|
|||
Builtin's
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define mi_unlikely(x) (__builtin_expect(!!(x),false))
|
||||
#define mi_likely(x) (__builtin_expect(!!(x),true))
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
||||
#define mi_unlikely(x) (x) [[unlikely]]
|
||||
#define mi_likely(x) (x) [[likely]]
|
||||
#else
|
||||
#define mi_unlikely(x) (x)
|
||||
#define mi_likely(x) (x)
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
@ -171,14 +183,25 @@ typedef int32_t mi_ssize_t;
|
|||
-------------------------------------------------------------------------------- */
|
||||
|
||||
size_t _mi_popcount_generic(size_t x);
|
||||
extern bool _mi_cpu_has_popcnt;
|
||||
|
||||
static inline size_t mi_popcount(size_t x) {
|
||||
#if mi_has_builtinz(popcount)
|
||||
#if defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||
#if !defined(__BMI1__)
|
||||
if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); }
|
||||
#endif
|
||||
size_t r;
|
||||
__asm ("popcnt\t%1,%0" : "=r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||
#if !defined(__BMI1__)
|
||||
if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); }
|
||||
#endif
|
||||
return (size_t)mi_msc_builtinz(__popcnt)(x);
|
||||
#elif defined(_MSC_VER) && MI_ARCH_ARM64
|
||||
return (size_t)mi_msc_builtinz(__popcnt)(x);
|
||||
#elif mi_has_builtinz(popcount)
|
||||
return mi_builtinz(popcount)(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
return mi_msc_builtinz(__popcnt)(x);
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_mm_popcnt_u64(x);
|
||||
#else
|
||||
#define MI_HAS_FAST_POPCOUNT 0
|
||||
return (x<=1 ? x : _mi_popcount_generic(x));
|
||||
|
|
|
@ -256,25 +256,6 @@ bool _mi_page_is_valid(mi_page_t* page);
|
|||
#endif
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Branches
|
||||
// ------------------------------------------------------
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define mi_unlikely(x) (__builtin_expect(!!(x),false))
|
||||
#define mi_likely(x) (__builtin_expect(!!(x),true))
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
||||
#define mi_unlikely(x) (x) [[unlikely]]
|
||||
#define mi_likely(x) (x) [[likely]]
|
||||
#else
|
||||
#define mi_unlikely(x) (x)
|
||||
#define mi_likely(x) (x)
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Assertions
|
||||
|
@ -1037,10 +1018,10 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
|
|||
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
|
||||
#include <intrin.h>
|
||||
#if !MI_TRACK_ENABLED && defined(_WIN32) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||
extern bool _mi_cpu_has_fsrm;
|
||||
extern bool _mi_cpu_has_erms;
|
||||
|
||||
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
|
||||
if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
|
||||
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
|
||||
|
|
41
src/init.c
41
src/init.c
|
@ -652,25 +652,52 @@ void _mi_process_load(void) {
|
|||
_mi_random_reinit_if_weak(&heap_main.random);
|
||||
}
|
||||
|
||||
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
|
||||
#include <intrin.h>
|
||||
// CPU features
|
||||
mi_decl_cache_align bool _mi_cpu_has_fsrm = false;
|
||||
mi_decl_cache_align bool _mi_cpu_has_erms = false;
|
||||
mi_decl_cache_align bool _mi_cpu_has_popcnt = false;
|
||||
|
||||
#if (MI_ARCH_X64 || MI_ARCH_X86)
|
||||
#if defined(__GNUC__)
|
||||
#include <cpuid.h>
|
||||
static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
|
||||
return (__get_cpuid(level, ®s4[0], ®s4[1], ®s4[2], ®s4[3]) == 1);
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
|
||||
__cpuid((int32_t*)regs4, (int32_t)level);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
|
||||
MI_UNUSED(regs4); MI_UNUSED(level);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void mi_detect_cpu_features(void) {
|
||||
// FSRM for fast short rep movsb/stosb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
|
||||
// EMRS for fast enhanced rep movsb/stosb support
|
||||
int32_t cpu_info[4];
|
||||
__cpuid(cpu_info, 7);
|
||||
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||
_mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||
uint32_t cpu_info[4];
|
||||
if (mi_cpuid(cpu_info, 7)) {
|
||||
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||
_mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||
}
|
||||
if (mi_cpuid(cpu_info, 1)) {
|
||||
_mi_cpu_has_popcnt = ((cpu_info[2] & (1 << 23)) != 0); // bit 23 of ECX : see <https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits>
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
static void mi_detect_cpu_features(void) {
|
||||
// nothing
|
||||
#if MI_ARCH_ARM64
|
||||
_mi_cpu_has_popcnt = true;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// Initialize the process; called by thread_init or the process loader
|
||||
void mi_process_init(void) mi_attr_noexcept {
|
||||
// ensure we are called once
|
||||
|
|
|
@ -355,7 +355,6 @@ size_t _mi_clz_generic(size_t x) {
|
|||
|
||||
#endif // bit scan
|
||||
|
||||
#if !MI_HAS_FAST_POPCOUNT
|
||||
|
||||
#if MI_SIZE_SIZE == 4
|
||||
#define mi_mask_even_bits32 (0x55555555)
|
||||
|
@ -383,7 +382,7 @@ static size_t mi_popcount_generic32(uint32_t x) {
|
|||
return mi_byte_sum32(x);
|
||||
}
|
||||
|
||||
size_t _mi_popcount_generic(size_t x) {
|
||||
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
|
||||
return mi_popcount_generic32(x);
|
||||
}
|
||||
|
||||
|
@ -407,9 +406,8 @@ static size_t mi_popcount_generic64(uint64_t x) {
|
|||
return mi_byte_sum64(x);
|
||||
}
|
||||
|
||||
size_t _mi_popcount_generic(size_t x) {
|
||||
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
|
||||
return mi_popcount_generic64(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // popcount
|
||||
|
|
Loading…
Add table
Reference in a new issue