From ef8e5d18a65f653bbef9cf57694aff37d2e85b9d Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jul 2020 18:00:38 -0700 Subject: [PATCH] replace atomics with C11/C++ atomics with explicit memory order; passes tsan. Issue #130 --- include/mimalloc-atomic.h | 319 ++++++++++++++---------------------- include/mimalloc-internal.h | 8 +- include/mimalloc-types.h | 4 +- src/alloc.c | 14 +- src/arena.c | 18 +- src/bitmap.inc.c | 14 +- src/heap.c | 2 +- src/options.c | 18 +- src/os.c | 26 +-- src/page-queue.c | 6 +- src/page.c | 18 +- src/random.c | 4 +- src/region.c | 50 +++--- src/segment.c | 62 ++++--- 14 files changed, 248 insertions(+), 315 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index cb247b09..e1fdda16 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018, Microsoft Research, Daan Leijen +Copyright (c) 2018,2020 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,150 +8,97 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_ATOMIC_H #define MIMALLOC_ATOMIC_H -// ------------------------------------------------------ +// -------------------------------------------------------------------------------------------- // Atomics // We need to be portable between C, C++, and MSVC. -// ------------------------------------------------------ +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// This is why we try to use only `uintptr_t` and `*` as atomic types. +// To gain better insight in the range of used atomics, we use explicitly named memory order operations +// instead of passing the memory order as a parameter. +// ----------------------------------------------------------------------------------------------- #if defined(__cplusplus) +// Use C++ atomics #include -#define _Atomic(tp) std::atomic +#define _Atomic(tp) std::atomic +#define mi_atomic(name) std::atomic_##name +#define mi_memory_order(name) std::memory_order_##name #elif defined(_MSC_VER) +// Use MSVC C wrapper for C11 atomics #define _Atomic(tp) tp #define ATOMIC_VAR_INIT(x) x +#define mi_atomic(name) mi_atomic_##name +#define mi_memory_order(name) mi_memory_order_##name #else +// Use C11 atomics #include +#define mi_atomic(name) atomic_##name +#define mi_memory_order(name) memory_order_##name #endif -// ------------------------------------------------------ -// Atomic operations specialized for mimalloc -// ------------------------------------------------------ +// Various defines for all used memory orders in mimalloc +#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) -// Atomically add a value; returns the previous value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add); +#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) -// Atomically "and" a value; returns the previous value. Memory ordering is release. -static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x); +#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) +#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) -// Atomically "or" a value; returns the previous value. Memory ordering is release. -static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x); +#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) -// Atomically compare and exchange a value; returns `true` if successful. -// May fail spuriously. Memory ordering is release; with relaxed on failure. -static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired); +#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,1) +#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,1) +#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,1) +#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,1) -// Atomically compare and exchange a value; returns `true` if successful. -// May fail spuriously. Memory ordering is acquire-release; with acquire on failure. -static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired); - -// Atomically compare and exchange a value; returns `true` if successful. -// Memory ordering is acquire-release; with acquire on failure. -static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired); - -// Atomically exchange a value. Memory ordering is acquire-release. -static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange); - -// Atomically read a value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_read_relaxed(const _Atomic(uintptr_t)* p); - -// Atomically read a value. Memory ordering is acquire. -static inline uintptr_t mi_atomic_read(const _Atomic(uintptr_t)* p); - -// Atomically write a value. Memory ordering is release. -static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x); - -// Yield static inline void mi_atomic_yield(void); - -// Atomically add a 64-bit value; returns the previous value. Memory ordering is relaxed. -// Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add); - -// Atomically update `*p` with the maximum of `*p` and `x` as a 64-bit value. -// Returns the previous value. Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x); +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add); +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub); -// Atomically subtract a value; returns the previous value. -static inline uintptr_t mi_atomic_sub(_Atomic(uintptr_t)* p, uintptr_t sub) { - return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub))); +#if defined(__cplusplus) || !defined(_MSC_VER) + +// In C++/C11 atomics we have polymorpic atomics so can use the typed `ptr` variants +// (where `tp` is the type of atomic value) +// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well +#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) +#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { + return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); +} +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { + int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); + while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; } -// Atomically increment a value; returns the incremented result. -static inline uintptr_t mi_atomic_increment(_Atomic(uintptr_t)* p) { - return mi_atomic_add(p, 1); -} -// Atomically decrement a value; returns the decremented result. -static inline uintptr_t mi_atomic_decrement(_Atomic(uintptr_t)* p) { - return mi_atomic_sub(p, 1); -} +#elif defined(_MSC_VER) -// Atomically add a signed value; returns the previous value. -static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)mi_atomic_add((_Atomic(uintptr_t)*)p, (uintptr_t)add); -} - -// Atomically subtract a signed value; returns the previous value. -static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub) { - return (intptr_t)mi_atomic_addi(p,-sub); -} - -// Atomically compare and exchange a void pointer; returns `true` if successful. May fail spuriously. -// Memory order is release. (like a write) -static inline bool mi_atomic_cas_weak_voidp(_Atomic(void*)* p, void** expected, void* desired, void* unused1, void* unused2) { - (void)unused1; (void)unused2; // for extra type check - return mi_atomic_cas_weak((_Atomic(uintptr_t)*)p, (uintptr_t*)expected, (uintptr_t)desired); -} - -// Atomically read a void pointer; Memory order is relaxed (i.e. no fence, only atomic). -static inline void* mi_atomic_read_voidp(const _Atomic(void*)* p, void* unused) { - (void)unused; // for extra type check - return (void*)mi_atomic_read((const _Atomic(uintptr_t)*) p); -} - -// Atomically read a void pointer; Memory order is acquire. -static inline void* mi_atomic_read_voidp_relaxed(const _Atomic(void*)*p, void* unused) { - (void)unused; // for extra type check - return (void*)mi_atomic_read_relaxed((const _Atomic(uintptr_t)*) p); -} - -// Atomically write a void pointer; Memory order is acquire. -static inline void mi_atomic_write_voidp(_Atomic(void*)* p, void* exchange, void* unused) { - (void)unused; // for extra type check - mi_atomic_write((_Atomic(uintptr_t)*) p, (uintptr_t)exchange); -} - -// Atomically exchange a void pointer; Memory order is release-acquire. -static inline void* mi_atomic_exchange_voidp(_Atomic(void*)*p, void* exchange, void* unused) { - (void)unused; // for extra type check - return (void*)mi_atomic_exchange((_Atomic(uintptr_t)*) p, (uintptr_t)exchange); -} - -// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. -// Memory order is release. (like a write) -#define mi_atomic_cas_ptr_weak(T,p,expected,desired) \ - mi_atomic_cas_weak_voidp((_Atomic(void*)*)(p), (void**)(expected), desired, *(p), *(expected)) - -// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). -#define mi_atomic_read_ptr_relaxed(T,p) \ - (T*)(mi_atomic_read_voidp_relaxed((const _Atomic(void*)*)(p), *(p))) - -// Atomically read a pointer; Memory order is acquire. -#define mi_atomic_read_ptr(T,p) \ - (T*)(mi_atomic_read_voidp((const _Atomic(void*)*)(p), *(p))) - -// Atomically write a pointer; Memory order is acquire. -#define mi_atomic_write_ptr(T,p,x) \ - mi_atomic_write_voidp((_Atomic(void*)*)(p), x, *(p)) - -// Atomically exchange a pointer value. -#define mi_atomic_exchange_ptr(T,p,exchange) \ - (T*)(mi_atomic_exchange_voidp((_Atomic(void*)*)(p), exchange, *(p))) - - - -#if !defined(__cplusplus) && defined(_MSC_VER) +// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. #define WIN32_LEAN_AND_MEAN #include #include @@ -162,16 +109,29 @@ typedef LONG64 msc_intptr_t; typedef LONG msc_intptr_t; #define MI_64(f) f #endif -static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) { + +typedef enum mi_memory_order_e { + mi_memory_order_relaxed, + mi_memory_order_consume, + mi_memory_order_acquire, + mi_memory_order_release, + mi_memory_order_acq_rel, + mi_memory_order_seq_cst +} mi_memory_order; + +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)* p, uintptr_t add, mi_memory_order mo) { return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } -static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); +} +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); if (read == *expected) { return true; @@ -181,28 +141,36 @@ static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expect return false; } } -static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { - return mi_atomic_cas_strong(p,expected,desired); +static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); } -static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired) { - return mi_atomic_cas_strong(p, expected, desired); -} -static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) { +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)* p, uintptr_t exchange, mi_memory_order mo) { return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } -static inline uintptr_t mi_atomic_read(_Atomic(uintptr_t) const* p) { - return *p; +static inline mi_atomic_thread_fence(mi_memory_order mo) { + _Atomic(uintptr_t)x = 0; + mi_atomic_exchange_explicit(&x, 1, mo); } -static inline uintptr_t mi_atomic_read_relaxed(_Atomic(uintptr_t) const* p) { +static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { + #if defined(_M_IX86) || defined(_M_X64) return *p; + #else + uintptr_t x = *p; + if (mo > mi_memory_order_relaxed) { + while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; + } + return x; + #endif } -static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) { +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { #if defined(_M_IX86) || defined(_M_X64) *p = x; #else - mi_atomic_exchange(p,x); + mi_atomic_exchange_explicit(p,x,mo); #endif } + +// These are used by the statistics static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 return (int64_t)mi_atomic_addi((int64_t*)p,add); @@ -216,7 +184,6 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int return current; #endif } - static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { int64_t current; do { @@ -224,63 +191,31 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } -#else -#ifdef __cplusplus -#define MI_USING_STD using namespace std; -#else -#define MI_USING_STD -#endif -static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) { - MI_USING_STD - return atomic_fetch_add_explicit(p, add, memory_order_acq_rel); -} -static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_fetch_and_explicit(p, x, memory_order_acq_rel); -} -static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_fetch_or_explicit(p, x, memory_order_acq_rel); -} -static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { - MI_USING_STD - return atomic_compare_exchange_weak_explicit(p, expected, desired, memory_order_release, memory_order_relaxed); -} -static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired) { - MI_USING_STD - return atomic_compare_exchange_weak_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire); -} -static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { - MI_USING_STD - return atomic_compare_exchange_strong_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire); -} -static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) { - MI_USING_STD - return atomic_exchange_explicit(p, exchange, memory_order_acq_rel); -} -static inline uintptr_t mi_atomic_read_relaxed(const _Atomic(uintptr_t)* p) { - MI_USING_STD - return atomic_load_explicit((_Atomic(uintptr_t)*) p, memory_order_relaxed); -} -static inline uintptr_t mi_atomic_read(const _Atomic(uintptr_t)* p) { - MI_USING_STD - return atomic_load_explicit((_Atomic(uintptr_t)*) p, memory_order_acquire); -} -static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_store_explicit(p, x, memory_order_release); -} -static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { - MI_USING_STD - return atomic_fetch_add_explicit((_Atomic(int64_t)*)p, add, memory_order_relaxed); -} -static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { - MI_USING_STD - int64_t current = atomic_load_explicit((_Atomic(int64_t)*)p, memory_order_relaxed); - while (current < x && !atomic_compare_exchange_weak_explicit((_Atomic(int64_t)*)p, ¤t, x, memory_order_release, memory_order_relaxed)) { /* nothing */ }; -} +// The pointer macros cast to `uintptr_t`. +#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) + #endif + +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { + return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); +} + +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p, -sub); +} + +// Yield #if defined(__cplusplus) #include static inline void mi_atomic_yield(void) { diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2dc7e36a..1afdae9c 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -448,21 +448,21 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { - return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); + return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); } static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { - return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3); + return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3); } // Heap access static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { - return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap)); + return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); } static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); - mi_atomic_write(&page->xheap,(uintptr_t)heap); + mi_atomic_store_release(&page->xheap,(uintptr_t)heap); } // Thread free flag helpers diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 17b33bc6..18c415eb 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -259,9 +259,9 @@ typedef struct mi_segment_s { bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields - struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` - struct mi_segment_s* prev; _Atomic(struct mi_segment_s*) abandoned_next; + struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init` + struct mi_segment_s* prev; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) diff --git a/src/alloc.c b/src/alloc.c index e1c54bed..ebf90ebc 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -307,7 +307,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfreex; bool use_delayed; - mi_thread_free_t tfree = mi_atomic_read_relaxed(&page->xthread_free); + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if (mi_unlikely(use_delayed)) { @@ -319,27 +319,27 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_read(&page->xheap)); //mi_page_heap(page); + mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) - mi_block_t* dfree = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap,block,dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } // and reset the MI_DELAYED_FREEING flag - tfree = mi_atomic_read_relaxed(&page->xthread_free); + tfree = mi_atomic_load_relaxed(&page->xthread_free); do { tfreex = tfree; mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); } } diff --git a/src/arena.c b/src/arena.c index 1c1fc1a0..73a7e704 100644 --- a/src/arena.c +++ b/src/arena.c @@ -105,12 +105,12 @@ static size_t mi_block_count_of_size(size_t size) { static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { const size_t fcount = arena->field_count; - size_t idx = mi_atomic_read(&arena->search_idx); // start from last search + size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around // try to atomically claim a range of bits if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { - mi_atomic_write(&arena->search_idx, idx); // start search from here next time + mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time return true; } } @@ -175,7 +175,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages @@ -187,7 +187,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, } // try from another numa node instead.. for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages @@ -228,7 +228,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s size_t bitmap_idx; mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -254,15 +254,15 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); - mi_assert_internal((uintptr_t)mi_atomic_read_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); + mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - uintptr_t i = mi_atomic_increment(&mi_arena_count); + uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { - mi_atomic_decrement(&mi_arena_count); + mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } - mi_atomic_write_ptr(mi_arena_t,&mi_arenas[i], arena); + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); return true; } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index b9953a4f..2d6df46e 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -121,9 +121,9 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); - uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); + uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]); if ((field & mask) == 0) { // free? - if (mi_atomic_cas_strong(&bitmap[idx], &field, (field|mask))) { + if (mi_atomic_cas_strong_acq_rel(&bitmap[idx], &field, (field|mask))) { // claimed! return true; } @@ -138,7 +138,7 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx { mi_assert_internal(bitmap_idx != NULL); _Atomic(uintptr_t)* field = &bitmap[idx]; - uintptr_t map = mi_atomic_read(field); + uintptr_t map = mi_atomic_load_relaxed(field); if (map==MI_BITMAP_FIELD_FULL) return false; // short cut // search for 0-bit sequence of length count @@ -158,7 +158,7 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx mi_assert_internal((m >> bitidx) == mask); // no overflow? const uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(field, &map, newmap)) { // TODO: use strong cas here? + if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } @@ -204,7 +204,7 @@ static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, s const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); - uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); + uintptr_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); } @@ -217,7 +217,7 @@ static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, siz const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); - uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); + uintptr_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); if (any_zero != NULL) *any_zero = ((prev & mask) != mask); return ((prev & mask) == 0); } @@ -228,7 +228,7 @@ static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_field const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); + uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]); if (any_ones != NULL) *any_ones = ((field & mask) != 0); return ((field & mask) == mask); } diff --git a/src/heap.c b/src/heap.c index 5d0d4b8a..526c93ed 100644 --- a/src/heap.c +++ b/src/heap.c @@ -143,7 +143,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); + mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); // collect segment caches if (collect >= MI_FORCE) { diff --git a/src/options.c b/src/options.c index 78c01456..85cbf7f6 100644 --- a/src/options.c +++ b/src/options.c @@ -173,11 +173,11 @@ static _Atomic(uintptr_t) out_len; static void mi_out_buf(const char* msg, void* arg) { UNUSED(arg); if (msg==NULL) return; - if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; + if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = strlen(msg); if (n==0) return; // claim space - uintptr_t start = mi_atomic_add(&out_len, n); + uintptr_t start = mi_atomic_add_acq_rel(&out_len, n); if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound if (start+n >= MI_MAX_DELAY_OUTPUT) { @@ -189,7 +189,7 @@ static void mi_out_buf(const char* msg, void* arg) { static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) - size_t count = mi_atomic_add(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); + size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; @@ -220,14 +220,14 @@ static mi_output_fun* volatile mi_out_default; // = NULL static _Atomic(void*) mi_out_arg; // = NULL static mi_output_fun* mi_out_get_default(void** parg) { - if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); } + if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - mi_atomic_write_ptr(void,&mi_out_arg, arg); + mi_atomic_store_ptr_release(void,&mi_out_arg, arg); if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } @@ -313,13 +313,13 @@ void _mi_verbose_message(const char* fmt, ...) { static void mi_show_error_message(const char* fmt, va_list args) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > mi_max_error_count) return; + if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > mi_max_error_count) return; + if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args); @@ -365,7 +365,7 @@ static void mi_error_default(int err) { void mi_register_error(mi_error_fun* fun, void* arg) { mi_error_handler = fun; // can be NULL - mi_atomic_write_ptr(void,&mi_error_arg, arg); + mi_atomic_store_ptr_release(void,&mi_error_arg, arg); } void _mi_error_message(int err, const char* fmt, ...) { @@ -376,7 +376,7 @@ void _mi_error_message(int err, const char* fmt, ...) { va_end(args); // and call the error handler which may abort (or return normally) if (mi_error_handler != NULL) { - mi_error_handler(err, mi_atomic_read_ptr(void,&mi_error_arg)); + mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg)); } else { mi_error_default(err); diff --git a/src/os.c b/src/os.c index 0b959a9c..8d0c8237 100644 --- a/src/os.c +++ b/src/os.c @@ -270,11 +270,11 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { - uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_cas_strong(&large_page_try_ok, &try_ok, try_ok - 1); + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); } else { // large OS pages must always reserve and commit. @@ -283,7 +283,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations + mi_atomic_store_release(&large_page_try_ok,10); // on error, don't try again for the next N allocations } } } @@ -361,13 +361,13 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static _Atomic(uintptr_t) large_page_try_ok; // = 0; - uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); if (!large_only && try_ok > 0) { // If the OS is not configured for large OS pages, or the user does not have // enough permission, the `mmap` will always fail (but it might also fail for other reasons). // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times // to avoid too many failing calls to mmap. - mi_atomic_cas_strong(&large_page_try_ok, &try_ok, try_ok - 1); + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); } else { int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux @@ -407,7 +407,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif if (large_only) return p; if (p == NULL) { - mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations + mi_atomic_store_release(&large_page_try_ok, 10); // on error, don't try again for the next N allocations } } } @@ -455,7 +455,7 @@ static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; if ((size%MI_SEGMENT_SIZE) != 0) return NULL; - uintptr_t hint = mi_atomic_add(&aligned_base, size); + uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) uintptr_t init = ((uintptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode @@ -463,8 +463,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif uintptr_t expected = hint + size; - mi_atomic_cas_strong(&aligned_base, &expected, init); - hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all + mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); + hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; return (void*)hint; @@ -760,10 +760,10 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) #else #if defined(MADV_FREE) static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(MADV_FREE); - int err = madvise(start, csize, (int)mi_atomic_read_relaxed(&advice)); + int err = madvise(start, csize, (int)mi_atomic_load_relaxed(&advice)); if (err != 0 && errno == EINVAL && advice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on - mi_atomic_write(&advice, MADV_DONTNEED); + mi_atomic_store_release(&advice, MADV_DONTNEED); err = madvise(start, csize, MADV_DONTNEED); } #elif defined(__wasi__) @@ -970,7 +970,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { uintptr_t start = 0; uintptr_t end = 0; - uintptr_t huge_start = mi_atomic_read_relaxed(&mi_huge_start); + uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start); do { start = huge_start; if (start == 0) { @@ -983,7 +983,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); - } while (!mi_atomic_cas_strong(&mi_huge_start, &huge_start, end)); + } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end)); if (total_size != NULL) *total_size = size; return (uint8_t*)start; diff --git a/src/page-queue.c b/src/page-queue.c index ea213019..37719e02 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { heap->page_count--; page->next = NULL; page->prev = NULL; - // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } @@ -274,7 +274,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -341,7 +341,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue for (mi_page_t* page = append->first; page != NULL; page = page->next) { // inline `mi_page_set_heap` to avoid wrong assertion during absorption; // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive. - mi_atomic_write(&page->xheap, (uintptr_t)heap); + mi_atomic_store_release(&page->xheap, (uintptr_t)heap); // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a // side effect that it spins until any DELAYED_FREEING is finished. This ensures // that after appending only the new heap will be used for delayed free operations. diff --git a/src/page.c b/src/page.c index 92faf9f2..cd96bb90 100644 --- a/src/page.c +++ b/src/page.c @@ -126,7 +126,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid mi_delayed_t old_delay; mi_thread_free_t tfree; do { - tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; + tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { @@ -140,7 +140,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid break; // leave never-delayed flag set } } while ((old_delay == MI_DELAYED_FREEING) || - !mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); + !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); } /* ----------------------------------------------------------- @@ -155,7 +155,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) { mi_block_t* head; mi_thread_free_t tfreex; - mi_thread_free_t tfree = mi_atomic_read_relaxed(&page->xthread_free); + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); @@ -273,8 +273,8 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { ----------------------------------------------------------- */ void _mi_heap_delayed_free(mi_heap_t* heap) { // take over the list (note: no atomic exchange since it is often NULL) - mi_block_t* block = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); - while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; + mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; // and free them all while(block != NULL) { @@ -283,10 +283,10 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting. - mi_block_t* dfree = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap, block, dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } block = next; } @@ -736,14 +736,14 @@ void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; - deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(void,&deferred_arg)); + deferred_free(force, heap->tld->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg)); heap->tld->recurse = false; } } void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; - mi_atomic_write_ptr(void,&deferred_arg, arg); + mi_atomic_store_ptr_release(void,&deferred_arg, arg); } diff --git a/src/random.c b/src/random.c index be95fc46..836f83a2 100644 --- a/src/random.c +++ b/src/random.c @@ -210,11 +210,11 @@ static bool os_random_buf(void* buf, size_t buf_len) { #define GRND_NONBLOCK (1) #endif static _Atomic(uintptr_t) no_getrandom; // = 0 - if (mi_atomic_read(&no_getrandom)==0) { + if (mi_atomic_load_acquire(&no_getrandom)==0) { ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); if (ret >= 0) return (buf_len == (size_t)ret); if (ret != ENOSYS) return false; - mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom + mi_atomic_store_release(&no_getrandom,1); // don't call again, and fall back to /dev/urandom } #endif int flags = O_RDONLY; diff --git a/src/region.c b/src/region.c index d2904687..e916e452 100644 --- a/src/region.c +++ b/src/region.c @@ -123,9 +123,9 @@ static size_t mi_good_commit_size(size_t size) { // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; - size_t count = mi_atomic_read_relaxed(®ions_count); + size_t count = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = mi_atomic_read_ptr_relaxed(uint8_t,®ions[i].start); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; @@ -133,7 +133,7 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); mi_assert_internal(start != NULL); return (start + (bit_idx * MI_SEGMENT_SIZE)); } @@ -171,7 +171,7 @@ static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? - if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; + if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); @@ -184,9 +184,9 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mi_assert_internal(!region_large || region_commit); // claim a fresh slot - const uintptr_t idx = mi_atomic_increment(®ions_count); + const uintptr_t idx = mi_atomic_increment_acq_rel(®ions_count); if (idx >= MI_REGION_MAX) { - mi_atomic_decrement(®ions_count); + mi_atomic_decrement_acq_rel(®ions_count); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats); _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB)); return false; @@ -195,13 +195,13 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; - mi_atomic_write(&r->in_use, 0); - mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); - mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); - mi_atomic_write(&r->reset, 0); + mi_atomic_store_release(&r->in_use, 0); + mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); + mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); + mi_atomic_store_release(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_write_ptr(uint8_t*,&r->start, start); + mi_atomic_store_ptr_release(uint8_t*,&r->start, start); // and share it mi_region_info_t info; @@ -209,7 +209,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, info.x.valid = true; info.x.is_large = region_large; info.x.numa_node = (short)_mi_os_numa_node(tld); - mi_atomic_write(&r->info, info.value); // now make it available to others + mi_atomic_store_release(&r->info, info.value); // now make it available to others *region = r; return true; } @@ -221,7 +221,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info; - info.value = mi_atomic_read_relaxed(®ion->info); + info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); if (info.value==0) return false; // numa correct @@ -240,7 +240,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot - const size_t count = mi_atomic_read(®ions_count); + const size_t count = mi_atomic_load_acquire(®ions_count); size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around @@ -280,8 +280,8 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); mi_region_info_t info; - info.value = mi_atomic_read(®ion->info); - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); + info.value = mi_atomic_load_acquire(®ion->info); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); @@ -400,7 +400,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); mi_region_info_t info; - info.value = mi_atomic_read(®ion->info); + info.value = mi_atomic_load_acquire(®ion->info); mi_assert_internal(info.value != 0); void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? @@ -442,21 +442,21 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re -----------------------------------------------------------------------------*/ void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. - uintptr_t rcount = mi_atomic_read_relaxed(®ions_count); + uintptr_t rcount = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < rcount; i++) { mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->info) != 0) { + if (mi_atomic_load_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region - uintptr_t m = mi_atomic_read_relaxed(®ion->in_use); - while (m == 0 && !mi_atomic_cas_weak(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; + uintptr_t m = mi_atomic_load_relaxed(®ion->in_use); + while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; if (m == 0) { // on success, free the whole region - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start); - size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); - uintptr_t commit = mi_atomic_read_relaxed(®ions[i].commit); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); + size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); + uintptr_t commit = mi_atomic_load_relaxed(®ions[i].commit); memset(®ions[i], 0, sizeof(mem_region_t)); // and release the whole region - mi_atomic_write(®ion->info, 0); + mi_atomic_store_release(®ion->info, 0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_abandoned_await_readers(); // ensure no pending reads _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats); diff --git a/src/segment.c b/src/segment.c index b5fd13d3..2416dadd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -628,17 +628,16 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ return NULL; } } - atomic_thread_fence(memory_order_acq_rel); segment->memid = memid; segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_committed = commit; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); mi_assert_internal(segment->mem_is_fixed ? segment->mem_is_committed : true); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan if (!pages_still_good) { // zero the segment info (but not the `mem` fields) - atomic_thread_fence(memory_order_release); // with read of `abandoned_next` in `mi_abandoned_pop` ptrdiff_t ofs = offsetof(mi_segment_t, next); memset((uint8_t*)segment + ofs, 0, info_size - ofs); @@ -792,7 +791,6 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); - atomic_thread_fence(memory_order_release); page->capacity = capacity; page->reserved = reserved; page->xblock_size = block_size; @@ -892,69 +890,69 @@ static mi_decl_cache_align _Atomic(uintptr_t) abandoned_readers; // = // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); - mi_segment_t* anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited); do { - mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, anext); - } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, &anext, segment)); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext); + } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment)); } // Move the visited list to the abandoned list. static bool mi_abandoned_visited_revisit(void) { // quick check if the visited list is empty - if (mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; + if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; // grab the whole visited list - mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); + mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL); if (first == NULL) return false; // first try to swap directly if the abandoned list happens to be NULL mi_tagged_segment_t afirst; - mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); if (mi_tagged_segment_ptr(ts)==NULL) { afirst = mi_tagged_segment(first, ts); - if (mi_atomic_cas_strong(&abandoned, &ts, afirst)) return true; + if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) return true; } // find the last element of the visited list: O(n) mi_segment_t* last = first; mi_segment_t* next; - while ((next = mi_atomic_read_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { + while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { last = next; } // and atomically prepend to the abandoned list // (no need to increase the readers as we don't access the abandoned segments) - mi_tagged_segment_t anext = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned); do { - mi_atomic_write_ptr(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); + mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); afirst = mi_tagged_segment(first, anext); - } while (!mi_atomic_cas_weak(&abandoned, &anext, afirst)); + } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst)); return true; } // Push on the abandoned list. static void mi_abandoned_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); mi_tagged_segment_t next; - mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); do { - mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); next = mi_tagged_segment(segment, ts); - } while (!mi_atomic_cas_weak(&abandoned, &ts, next)); + } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next)); } // Wait until there are no more pending reads on segments that used to be in the abandoned list void _mi_abandoned_await_readers(void) { uintptr_t n; do { - n = mi_atomic_read(&abandoned_readers); + n = mi_atomic_load_acquire(&abandoned_readers); if (n != 0) mi_atomic_yield(); } while (n != 0); } @@ -963,7 +961,7 @@ void _mi_abandoned_await_readers(void) { static mi_segment_t* mi_abandoned_pop(void) { mi_segment_t* segment; // Check efficiently if it is empty (or if the visited list needs to be moved) - mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); if (mi_likely(segment == NULL)) { if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL @@ -975,19 +973,19 @@ static mi_segment_t* mi_abandoned_pop(void) { // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. // (this is called from `region.c:_mi_mem_free` for example) - mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted + mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; - ts = mi_atomic_read(&abandoned); + ts = mi_atomic_load_acquire(&abandoned); do { segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { - mi_segment_t* anext = mi_atomic_read_ptr(mi_segment_t, &segment->abandoned_next); + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next); next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); - mi_atomic_decrement(&abandoned_readers); // release reader lock + mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock if (segment != NULL) { - mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, NULL); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); } return segment; } @@ -999,7 +997,7 @@ static mi_segment_t* mi_abandoned_pop(void) { static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed @@ -1013,7 +1011,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_visits = 0; - mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, NULL); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); mi_abandoned_push(segment); } @@ -1077,7 +1075,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool // Reclaim a segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } segment->thread_id = _mi_thread_id(); @@ -1294,13 +1292,13 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block // huge page segments are always abandoned and can be freed immediately by any thread mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); mi_assert_internal(segment == _mi_page_segment(page)); - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id)==0); // claim it and free mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized. // paranoia: if this it the last reference, the cas should always succeed uintptr_t expected_tid = 0; - if (mi_atomic_cas_strong(&segment->thread_id, &expected_tid, heap->thread_id)) { + if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) { mi_block_set_next(page, block, page->free); page->free = block; page->used--;