From ab3dac04c2478a484b0e063b04fd0f19cc0f368d Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Wed, 30 Dec 2020 21:36:41 +0100 Subject: [PATCH 1/6] Use tpidrro_el0 for thread local storage in macOS-arm64 Fixes #343 --- include/mimalloc-internal.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e3e78e40..df700d39 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -707,7 +707,11 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { res = tcb[slot]; #elif defined(__aarch64__) void** tcb; UNUSED(ofs); +#if defined(__MACH__) + __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); +#else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); +#endif res = tcb[slot]; #endif return res; @@ -730,7 +734,11 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { tcb[slot] = value; #elif defined(__aarch64__) void** tcb; UNUSED(ofs); +#if defined(__MACH__) + __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); +#else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); +#endif tcb[slot] = value; #endif } From 88330cfc9fdd4ec5aaa7988efdd209a593bb0026 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Fri, 22 Jan 2021 17:06:43 +0100 Subject: [PATCH 2/6] Use __APPLE__ instead of __MACH__ --- include/mimalloc-internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index df700d39..dca21bb9 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -707,7 +707,7 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { res = tcb[slot]; #elif defined(__aarch64__) void** tcb; UNUSED(ofs); -#if defined(__MACH__) +#if defined(__APPLE__) __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); #else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); From fb66ebea1d018130b4f0b228f0b9f2f651cca7ae Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" <1330696+mr-c@users.noreply.github.com> Date: Sat, 23 Jan 2021 16:45:47 +0100 Subject: [PATCH 3/6] add/improve atomic yields for SSE2, ARM*, PowerPC --- include/mimalloc-atomic.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 9f464593..a925a7f1 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -281,16 +281,33 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { YieldProcessor(); } +#elif defined(__SSE2__) +#include +static inline void mi_atomic_yield(void) { + _mm_pause(); +} #elif (defined(__GNUC__) || defined(__clang__)) && \ - (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH__ >= 7) || defined(__aarch64__)) + (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); } -#elif (defined(__arm__) && __ARM_ARCH__ >= 7) || defined(__aarch64__) +#elif defined(__aarch64__) +static inline void mi_atomic_yield(void) { + asm volatile("wfe"); +} +#elif (defined(__arm__) && __ARM_ARCH__ >= 7) static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) +static inline void mi_atomic_yield(void) { + __asm__ __volatile__ ("or 27,27,27" ::: "memory"); +} +#elif defined(__armel__) || defined(__ARMEL__) +static inline void mi_atomic_yield(void) { + asm volatile ("nop" ::: "memory"); +} #endif #elif defined(__sun) // Fallback for other archs From a753084f748d59dcd18fdc4f10c3e36ccfc107f5 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 28 Jan 2021 11:38:38 +0100 Subject: [PATCH 4/6] Use APPLE instead of MACH --- include/mimalloc-internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index dca21bb9..e1190c7e 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -734,7 +734,7 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { tcb[slot] = value; #elif defined(__aarch64__) void** tcb; UNUSED(ofs); -#if defined(__MACH__) +#if defined(__APPLE__) __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); #else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); From 8d4444ef00c8aa3c073ea9a744659a74db75980a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 28 Jan 2021 17:36:35 -0800 Subject: [PATCH 5/6] remove spurious parenthesis (#350) --- include/mimalloc-atomic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index a925a7f1..2d725a25 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -287,7 +287,8 @@ static inline void mi_atomic_yield(void) { _mm_pause(); } #elif (defined(__GNUC__) || defined(__clang__)) && \ - (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); From 78ce716e2d5b1572e9b459cba12830e88c892989 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 28 Jan 2021 17:36:56 -0800 Subject: [PATCH 6/6] add comment on use of tpidrro_el0 on macOS --- include/mimalloc-internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e1190c7e..6a239f1a 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -707,7 +707,7 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { res = tcb[slot]; #elif defined(__aarch64__) void** tcb; UNUSED(ofs); -#if defined(__APPLE__) +#if defined(__APPLE__) // issue #343 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); #else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); @@ -734,7 +734,7 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { tcb[slot] = value; #elif defined(__aarch64__) void** tcb; UNUSED(ofs); -#if defined(__APPLE__) +#if defined(__APPLE__) // issue #343 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); #else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));