Merge branch 'dev' into dev-slice

This commit is contained in:
Daan 2024-11-25 19:29:49 -08:00
commit 577246d9ed
3 changed files with 9 additions and 5 deletions

View file

@ -1022,8 +1022,9 @@ static inline size_t mi_bsr(uintptr_t x) {
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
#include <intrin.h>
extern bool _mi_cpu_has_fsrm;
extern bool _mi_cpu_has_erms;
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
if (_mi_cpu_has_fsrm) {
if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
}
else {
@ -1031,7 +1032,7 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
}
}
static inline void _mi_memzero(void* dst, size_t n) {
if (_mi_cpu_has_fsrm) {
if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
__stosb((unsigned char*)dst, 0, n);
}
else {

View file

@ -138,9 +138,9 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// but unfortunately we can not detect support reliably (see issue #883)
// We also use it on Apple OS as we use a TLS slot for the default heap there.
#if defined(__GNUC__) && ( \
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \
|| (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
|| (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
)

View file

@ -614,12 +614,15 @@ void _mi_process_load(void) {
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
#include <intrin.h>
mi_decl_cache_align bool _mi_cpu_has_fsrm = false;
mi_decl_cache_align bool _mi_cpu_has_erms = false;
static void mi_detect_cpu_features(void) {
// FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
// FSRM for fast short rep movsb/stosb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
// EMRS for fast enhanced rep movsb/stosb support
int32_t cpu_info[4];
__cpuid(cpu_info, 7);
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
_mi_cpu_has_erms = ((cpu_info[2] & (1 << 9)) != 0); // bit 9 of ECX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
}
#else
static void mi_detect_cpu_features(void) {