From e27422adca7285bdcace8a6052860122eaa1bff7 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jul 2020 20:55:45 -0700 Subject: [PATCH 01/11] switch to using C++ atomics in MSVC as well --- include/mimalloc-atomic.h | 10 +++++----- src/bitmap.inc.c | 8 ++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 722b6ad6..c3d0ad23 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -13,12 +13,12 @@ terms of the MIT license. A copy of the license can be found in the file // We need to be portable between C, C++, and MSVC. // ------------------------------------------------------ -#if defined(_MSC_VER) -#define _Atomic(tp) tp -#define ATOMIC_VAR_INIT(x) x -#elif defined(__cplusplus) +#if defined(__cplusplus) #include #define _Atomic(tp) std::atomic +#elif defined(_MSC_VER) +#define _Atomic(tp) tp +#define ATOMIC_VAR_INIT(x) x #else #include #endif @@ -126,7 +126,7 @@ static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t su (T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) -#ifdef _MSC_VER +#if !defined(__cplusplus) && defined(_MSC_VER) #define WIN32_LEAN_AND_MEAN #include #include diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index c3813a44..99e8fa6f 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -72,6 +72,14 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { #if defined(_MSC_VER) #define MI_HAVE_BITSCAN #include +#ifndef MI_64 +#if MI_INTPTR_SIZE==8 +#define MI_64(f) f##64 +#else +#define MI_64(f) f +#endif +#endif + static inline size_t mi_bsf(uintptr_t x) { if (x==0) return 8*MI_INTPTR_SIZE; DWORD idx; From 09ade024298000157729c877c2087cfe2d762454 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jul 2020 22:52:27 -0700 Subject: [PATCH 02/11] bring inline with C11 atomics; no volatile and cas order of expected/desired --- include/mimalloc-atomic.h | 166 +++++++++++++++++--------------------- include/mimalloc-types.h | 36 ++++----- src/alloc.c | 15 ++-- src/arena.c | 2 +- src/bitmap.inc.c | 11 ++- src/options.c | 6 +- src/os.c | 19 ++--- src/page.c | 26 +++--- src/random.c | 2 +- src/region.c | 14 ++-- src/segment.c | 37 +++++---- src/stats.c | 24 +++--- 12 files changed, 170 insertions(+), 188 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index c3d0ad23..beb0f12c 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -27,103 +27,99 @@ terms of the MIT license. A copy of the license can be found in the file // Atomic operations specialized for mimalloc // ------------------------------------------------------ -// Atomically add a value; returns the previous value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add); +// Atomically add a value; returns the previous value. Memory ordering is acquire-release. +static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add); -// Atomically "and" a value; returns the previous value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); +// Atomically "and" a value; returns the previous value. Memory ordering is acquire-release. +static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x); -// Atomically "or" a value; returns the previous value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); +// Atomically "or" a value; returns the previous value. Memory ordering is acquire-release. +static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x); // Atomically compare and exchange a value; returns `true` if successful. -// May fail spuriously. Memory ordering as release on success, and relaxed on failure. -// (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); +// May fail spuriously. Memory ordering is acquire-release; with acquire on failure. +static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired); // Atomically compare and exchange a value; returns `true` if successful. -// Memory ordering is acquire-release -// (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); +// Memory ordering is acquire-release; with acquire on failure. +static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired); // Atomically exchange a value. Memory ordering is acquire-release. -static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange); +static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange); // Atomically read a value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p); +static inline uintptr_t mi_atomic_read_relaxed(const _Atomic(uintptr_t)* p); // Atomically read a value. Memory ordering is acquire. -static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p); +static inline uintptr_t mi_atomic_read(const _Atomic(uintptr_t)* p); // Atomically write a value. Memory ordering is release. -static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); +static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x); // Yield static inline void mi_atomic_yield(void); -// Atomically add a 64-bit value; returns the previous value. +// Atomically add a 64-bit value; returns the previous value. Memory ordering is relaxed. // Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add); +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add); // Atomically update `*p` with the maximum of `*p` and `x` as a 64-bit value. // Returns the previous value. Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x); +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x); -// Atomically read a 64-bit value -// Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline int64_t mi_atomic_readi64(volatile int64_t* p); // Atomically subtract a value; returns the previous value. -static inline uintptr_t mi_atomic_sub(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { +static inline uintptr_t mi_atomic_sub(_Atomic(uintptr_t)* p, uintptr_t sub) { return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub))); } // Atomically increment a value; returns the incremented result. -static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) { +static inline uintptr_t mi_atomic_increment(_Atomic(uintptr_t)* p) { return mi_atomic_add(p, 1); } // Atomically decrement a value; returns the decremented result. -static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { +static inline uintptr_t mi_atomic_decrement(_Atomic(uintptr_t)* p) { return mi_atomic_sub(p, 1); } // Atomically add a signed value; returns the previous value. -static inline intptr_t mi_atomic_addi(volatile _Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)mi_atomic_add((volatile _Atomic(uintptr_t)*)p, (uintptr_t)add); +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add) { + return (intptr_t)mi_atomic_add((_Atomic(uintptr_t)*)p, (uintptr_t)add); } // Atomically subtract a signed value; returns the previous value. -static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t sub) { +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub) { return (intptr_t)mi_atomic_addi(p,-sub); } // Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). #define mi_atomic_read_ptr_relaxed(T,p) \ - (T*)(mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)(p))) + (T*)(mi_atomic_read_relaxed((const _Atomic(uintptr_t)*)(p))) // Atomically read a pointer; Memory order is acquire. #define mi_atomic_read_ptr(T,p) \ - (T*)(mi_atomic_read((const volatile _Atomic(uintptr_t)*)(p))) + (T*)(mi_atomic_read((const _Atomic(uintptr_t)*)(p))) // Atomically write a pointer; Memory order is acquire. #define mi_atomic_write_ptr(T,p,x) \ - mi_atomic_write((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x)) + mi_atomic_write((_Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x)) + + +static inline bool mi_atomic_cas_weak_voidp(_Atomic(void*)*p, void** expected, void* desired, void* unused) { + (void)(unused); + return mi_atomic_cas_weak((_Atomic(uintptr_t)*)p, (uintptr_t*)expected, (uintptr_t)desired); +} // Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. // Memory order is release. (like a write) -// (Note: expected and desired are in opposite order from atomic_compare_exchange) -#define mi_atomic_cas_ptr_weak(T,p,desired,expected) \ - mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) +#define mi_atomic_cas_ptr_weak(T,p,expected,desired) \ + mi_atomic_cas_weak_voidp((_Atomic(void*)*)(p), (void**)(expected), desired, *(expected)) -// Atomically compare and exchange a pointer; returns `true` if successful. Memory order is acquire_release. -// (Note: expected and desired are in opposite order from atomic_compare_exchange) -#define mi_atomic_cas_ptr_strong(T,p,desired,expected) \ - mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)(p),(uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) // Atomically exchange a pointer value. #define mi_atomic_exchange_ptr(T,p,exchange) \ - (T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) + (T*)mi_atomic_exchange((_Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) #if !defined(__cplusplus) && defined(_MSC_VER) @@ -137,31 +133,38 @@ typedef LONG64 msc_intptr_t; typedef LONG msc_intptr_t; #define MI_64(f) f #endif -static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { +static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) { return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } -static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) { return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) { return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); +static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { + uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); + if (read == *expected) { + return true; + } + else { + *expected = read; + return false; + } } -static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return mi_atomic_cas_strong(p,desired,expected); +static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { + return mi_atomic_cas_strong(p,expected,desired); } -static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { +static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) { return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } -static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { +static inline uintptr_t mi_atomic_read(_Atomic(uintptr_t) const* p) { return *p; } -static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { +static inline uintptr_t mi_atomic_read_relaxed(_Atomic(uintptr_t) const* p) { return *p; } -static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { +static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) { #if defined(_M_IX86) || defined(_M_X64) *p = x; #else @@ -171,9 +174,9 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) static inline void mi_atomic_yield(void) { YieldProcessor(); } -static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) { +static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 - mi_atomic_addi(p,add); + return (int64_t)mi_atomic_addi((int64_t*)p,add); #else int64_t current; int64_t sum; @@ -181,84 +184,67 @@ static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) { current = *p; sum = current + add; } while (_InterlockedCompareExchange64(p, sum, current) != current); + return current; #endif } -static inline void mi_atomic_maxi64(volatile _Atomic(int64_t)*p, int64_t x) { +static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { int64_t current; do { current = *p; } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } -static inline int64_t mi_atomic_readi64(volatile _Atomic(int64_t)*p) { - #ifdef _WIN64 - return *p; - #else - int64_t current; - do { - current = *p; - } while (_InterlockedCompareExchange64(p, current, current) != current); - return current; - #endif -} - #else #ifdef __cplusplus #define MI_USING_STD using namespace std; #else #define MI_USING_STD #endif -static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { +static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) { MI_USING_STD - return atomic_fetch_add_explicit(p, add, memory_order_relaxed); + return atomic_fetch_add_explicit(p, add, memory_order_acq_rel); } -static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD return atomic_fetch_and_explicit(p, x, memory_order_acq_rel); } -static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD return atomic_fetch_or_explicit(p, x, memory_order_acq_rel); } -static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { +static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { MI_USING_STD - return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_acquire); + return atomic_compare_exchange_weak_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire); } -static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { +static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { MI_USING_STD - return atomic_compare_exchange_strong_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_acquire); + return atomic_compare_exchange_strong_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire); } -static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { +static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) { MI_USING_STD return atomic_exchange_explicit(p, exchange, memory_order_acq_rel); } -static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p) { +static inline uintptr_t mi_atomic_read_relaxed(const _Atomic(uintptr_t)* p) { MI_USING_STD - return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed); + return atomic_load_explicit((_Atomic(uintptr_t)*) p, memory_order_relaxed); } -static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) { +static inline uintptr_t mi_atomic_read(const _Atomic(uintptr_t)* p) { MI_USING_STD - return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire); + return atomic_load_explicit((_Atomic(uintptr_t)*) p, memory_order_acquire); } -static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { +static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD return atomic_store_explicit(p, x, memory_order_release); } -static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add) { +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { MI_USING_STD - atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); + return atomic_fetch_add_explicit((_Atomic(int64_t)*)p, add, memory_order_relaxed); } -static inline int64_t mi_atomic_readi64(volatile int64_t* p) { +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { MI_USING_STD - return atomic_load_explicit((volatile _Atomic(int64_t)*) p, memory_order_relaxed); -} -static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x) { - MI_USING_STD - int64_t current; - do { - current = mi_atomic_readi64(p); - } while (current < x && !atomic_compare_exchange_weak_explicit((volatile _Atomic(int64_t)*)p, ¤t, x, memory_order_acq_rel, memory_order_relaxed)); + int64_t current = atomic_load_explicit((_Atomic(int64_t)*)p, memory_order_relaxed); + while (current < x && !atomic_compare_exchange_weak_explicit((_Atomic(int64_t)*)p, ¤t, x, memory_order_acq_rel, memory_order_acquire)) { /* nothing */ }; } #if defined(__cplusplus) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 449e2e41..5b31f6f3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -222,8 +222,8 @@ typedef struct mi_page_s { uint32_t xblock_size; // size available in each block (always `>0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads - volatile _Atomic(uintptr_t) xheap; + _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + _Atomic(uintptr_t) xheap; struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` @@ -243,28 +243,28 @@ typedef enum mi_page_kind_e { // contain blocks. typedef struct mi_segment_s { // memory fields - size_t memid; // id for the os-level memory manager - bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) - bool mem_is_committed; // `true` if the whole segment is eagerly committed + size_t memid; // id for the os-level memory manager + bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields - struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` + struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* prev; struct mi_segment_s* abandoned_next; - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) - size_t used; // count of pages in use (`used <= capacity`) - size_t capacity; // count of available pages (`#free + used`) - size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` - size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` + size_t used; // count of pages in use (`used <= capacity`) + size_t capacity; // count of available pages (`#free + used`) + size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` + size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages. + uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` // layout like this to optimize access in `mi_free` - size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment - mi_page_kind_t page_kind; // kind of pages: small, large, or huge - mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages + size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). + _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment + mi_page_kind_t page_kind; // kind of pages: small, large, or huge + mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; @@ -322,7 +322,7 @@ struct mi_heap_s { mi_tld_t* tld; mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") - volatile _Atomic(mi_block_t*) thread_delayed_free; + _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list diff --git a/src/alloc.c b/src/alloc.c index 57034522..62c3c018 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -305,11 +305,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc } // Try to put the block on either the page-local thread free list, or the heap delayed free list. - mi_thread_free_t tfree; mi_thread_free_t tfreex; bool use_delayed; + mi_thread_free_t tfree = mi_atomic_read_relaxed(&page->xthread_free); do { - tfree = mi_atomic_read_relaxed(&page->xthread_free); use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list @@ -320,7 +319,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) @@ -328,19 +327,19 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) - mi_block_t* dfree; + mi_block_t* dfree = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { - dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); mi_block_set_nextx(heap,block,dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); + } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } // and reset the MI_DELAYED_FREEING flag + tfree = mi_atomic_read_relaxed(&page->xthread_free); do { - tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free); + tfreex = tfree; mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); } } diff --git a/src/arena.c b/src/arena.c index bb9fc174..1c1fc1a0 100644 --- a/src/arena.c +++ b/src/arena.c @@ -63,7 +63,7 @@ typedef struct mi_arena_s { bool is_zero_init; // is the arena zero initialized? bool is_committed; // is the memory committed bool is_large; // large OS page allocated - volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks + _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 99e8fa6f..b9953a4f 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -30,7 +30,7 @@ and that the sequence must be smaller or equal to the bits in a field. #define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set // An atomic bitmap of `uintptr_t` fields -typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t; +typedef _Atomic(uintptr_t) mi_bitmap_field_t; typedef mi_bitmap_field_t* mi_bitmap_t; // A bitmap index is the index of the bit in a bitmap. @@ -123,7 +123,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); if ((field & mask) == 0) { // free? - if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { + if (mi_atomic_cas_strong(&bitmap[idx], &field, (field|mask))) { // claimed! return true; } @@ -137,7 +137,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); - volatile _Atomic(uintptr_t)* field = &bitmap[idx]; + _Atomic(uintptr_t)* field = &bitmap[idx]; uintptr_t map = mi_atomic_read(field); if (map==MI_BITMAP_FIELD_FULL) return false; // short cut @@ -158,9 +158,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx mi_assert_internal((m >> bitidx) == mask); // no overflow? const uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? - // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read(field); + if (!mi_atomic_cas_weak(field, &map, newmap)) { // TODO: use strong cas here? + // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } else { diff --git a/src/options.c b/src/options.c index f29b387c..78c01456 100644 --- a/src/options.c +++ b/src/options.c @@ -217,7 +217,7 @@ static void mi_out_buf_stderr(const char* msg, void* arg) { // For now, don't register output from multiple threads. #pragma warning(suppress:4180) static mi_output_fun* volatile mi_out_default; // = NULL -static volatile _Atomic(void*) mi_out_arg; // = NULL +static _Atomic(void*) mi_out_arg; // = NULL static mi_output_fun* mi_out_get_default(void** parg) { if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); } @@ -241,7 +241,7 @@ static void mi_add_stderr_output() { // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- -static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings +static _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation // inside the C runtime causes another message. @@ -339,7 +339,7 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co // -------------------------------------------------------- static mi_error_fun* volatile mi_error_handler; // = NULL -static volatile _Atomic(void*) mi_error_arg; // = NULL +static _Atomic(void*) mi_error_arg; // = NULL static void mi_error_default(int err) { UNUSED(err); diff --git a/src/os.c b/src/os.c index 8079e5a0..29a76a88 100644 --- a/src/os.c +++ b/src/os.c @@ -266,7 +266,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { mi_assert_internal(!(large_only && !allow_large)); - static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; + static _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { @@ -274,7 +274,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); + mi_atomic_cas_strong(&large_page_try_ok, &try_ok, try_ok - 1); } else { // large OS pages must always reserve and commit. @@ -360,14 +360,14 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro fd = VM_MAKE_TAG(os_tag); #endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { - static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; + static _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // If the OS is not configured for large OS pages, or the user does not have // enough permission, the `mmap` will always fail (but it might also fail for other reasons). // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times // to avoid too many failing calls to mmap. - mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); + mi_atomic_cas_strong(&large_page_try_ok, &try_ok, try_ok - 1); } else { int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux @@ -449,7 +449,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base; +static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { @@ -462,7 +462,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif - mi_atomic_cas_strong(&aligned_base, init, hint + size); + uintptr_t expected = hint + size; + mi_atomic_cas_strong(&aligned_base, &expected, init); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; @@ -969,9 +970,9 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { uintptr_t start = 0; uintptr_t end = 0; - uintptr_t expected; + uintptr_t huge_start = mi_atomic_read_relaxed(&mi_huge_start); do { - start = expected = mi_atomic_read_relaxed(&mi_huge_start); + start = huge_start; if (start == 0) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address @@ -982,7 +983,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); - } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); + } while (!mi_atomic_cas_strong(&mi_huge_start, &huge_start, end)); if (total_size != NULL) *total_size = size; return (uint8_t*)start; diff --git a/src/page.c b/src/page.c index c8a4e54b..6b92d4c9 100644 --- a/src/page.c +++ b/src/page.c @@ -122,11 +122,11 @@ bool _mi_page_is_valid(mi_page_t* page) { #endif void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { - mi_thread_free_t tfree; mi_thread_free_t tfreex; mi_delayed_t old_delay; + mi_thread_free_t tfree; do { - tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS + tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { @@ -140,7 +140,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid break; // leave never-delayed flag set } } while ((old_delay == MI_DELAYED_FREEING) || - !mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + !mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); } /* ----------------------------------------------------------- @@ -154,13 +154,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid static void _mi_page_thread_free_collect(mi_page_t* page) { mi_block_t* head; - mi_thread_free_t tfree; mi_thread_free_t tfreex; + mi_thread_free_t tfree = mi_atomic_read_relaxed(&page->xthread_free); do { - tfree = mi_atomic_read_relaxed(&page->xthread_free); head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); - } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); // return if the list is empty if (head == NULL) return; @@ -273,11 +272,9 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ void _mi_heap_delayed_free(mi_heap_t* heap) { - // take over the list (note: no atomic exchange is it is often NULL) - mi_block_t* block; - do { - block = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); - } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, NULL, block)); + // take over the list (note: no atomic exchange since it is often NULL) + mi_block_t* block = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; // and free them all while(block != NULL) { @@ -286,11 +283,10 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting. - mi_block_t* dfree; + mi_block_t* dfree = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { - dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); mi_block_set_nextx(heap, block, dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); + } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } block = next; } @@ -734,7 +730,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { ----------------------------------------------------------- */ static mi_deferred_free_fun* volatile deferred_free = NULL; -static volatile _Atomic(void*) deferred_arg; // = NULL +static _Atomic(void*) deferred_arg; // = NULL void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; diff --git a/src/random.c b/src/random.c index 2a96ccf6..5c093a91 100644 --- a/src/random.c +++ b/src/random.c @@ -200,7 +200,7 @@ static bool os_random_buf(void* buf, size_t buf_len) { #ifndef GRND_NONBLOCK #define GRND_NONBLOCK (1) #endif - static volatile _Atomic(uintptr_t) no_getrandom; // = 0 + static _Atomic(uintptr_t) no_getrandom; // = 0 if (mi_atomic_read(&no_getrandom)==0) { ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); if (ret >= 0) return (buf_len == (size_t)ret); diff --git a/src/region.c b/src/region.c index ae3a799a..d2904687 100644 --- a/src/region.c +++ b/src/region.c @@ -86,13 +86,13 @@ typedef union mi_region_info_u { // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(uintptr_t) info; // mi_region_info_t.value - volatile _Atomic(void*) start; // start of the memory area + _Atomic(uintptr_t) info; // mi_region_info_t.value + _Atomic(void*) start; // start of the memory area mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block mi_bitmap_field_t reset; // track if reset per block - volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena + _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena uintptr_t padding; // round to 8 fields } mem_region_t; @@ -100,7 +100,7 @@ typedef struct mem_region_s { static mem_region_t regions[MI_REGION_MAX]; // Allocated regions -static volatile _Atomic(uintptr_t) regions_count; // = 0; +static _Atomic(uintptr_t) regions_count; // = 0; /* ---------------------------------------------------------------------------- @@ -447,10 +447,8 @@ void _mi_mem_collect(mi_os_tld_t* tld) { mem_region_t* region = ®ions[i]; if (mi_atomic_read_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region - uintptr_t m; - do { - m = mi_atomic_read_relaxed(®ion->in_use); - } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); + uintptr_t m = mi_atomic_read_relaxed(®ion->in_use); + while (m == 0 && !mi_atomic_cas_weak(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; if (m == 0) { // on success, free the whole region uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start); diff --git a/src/segment.c b/src/segment.c index 8a5ba8c0..58c227bb 100644 --- a/src/segment.c +++ b/src/segment.c @@ -877,15 +877,15 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se // This is a list of visited abandoned pages that were full at the time. // this list migrates to `abandoned` when that becomes NULL. The use of // this list reduces contention and the rate at which segments are visited. -static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL +static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL // The abandoned page list (tagged as it supports pop) -static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL +static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL // We also maintain a count of current readers of the abandoned list // in order to prevent resetting/decommitting segment memory if it might // still be read. -static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0 +static mi_decl_cache_align _Atomic(uintptr_t) abandoned_readers; // = 0 // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { @@ -893,11 +893,10 @@ static void mi_abandoned_visited_push(mi_segment_t* segment) { mi_assert_internal(segment->abandoned_next == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); - mi_segment_t* anext; + mi_segment_t* anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); do { - anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); segment->abandoned_next = anext; - } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext)); + } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, &anext, segment)); } // Move the visited list to the abandoned list. @@ -911,11 +910,11 @@ static bool mi_abandoned_visited_revisit(void) if (first == NULL) return false; // first try to swap directly if the abandoned list happens to be NULL - const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); mi_tagged_segment_t afirst; + mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); if (mi_tagged_segment_ptr(ts)==NULL) { afirst = mi_tagged_segment(first, ts); - if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true; + if (mi_atomic_cas_strong(&abandoned, &ts, afirst)) return true; } // find the last element of the visited list: O(n) @@ -926,12 +925,11 @@ static bool mi_abandoned_visited_revisit(void) // and atomically prepend to the abandoned list // (no need to increase the readers as we don't access the abandoned segments) - mi_tagged_segment_t anext; + mi_tagged_segment_t anext = mi_atomic_read_relaxed(&abandoned); do { - anext = mi_atomic_read_relaxed(&abandoned); last->abandoned_next = mi_tagged_segment_ptr(anext); afirst = mi_tagged_segment(first, anext); - } while (!mi_atomic_cas_weak(&abandoned, afirst, anext)); + } while (!mi_atomic_cas_weak(&abandoned, &anext, afirst)); return true; } @@ -941,13 +939,12 @@ static void mi_abandoned_push(mi_segment_t* segment) { mi_assert_internal(segment->abandoned_next == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); - mi_tagged_segment_t ts; mi_tagged_segment_t next; + mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); do { - ts = mi_atomic_read_relaxed(&abandoned); segment->abandoned_next = mi_tagged_segment_ptr(ts); next = mi_tagged_segment(segment, ts); - } while (!mi_atomic_cas_weak(&abandoned, next, ts)); + } while (!mi_atomic_cas_weak(&abandoned, &ts, next)); } // Wait until there are no more pending reads on segments that used to be in the abandoned list @@ -977,13 +974,13 @@ static mi_segment_t* mi_abandoned_pop(void) { // (this is called from `memory.c:_mi_mem_free` for example) mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; + ts = mi_atomic_read(&abandoned); do { - ts = mi_atomic_read(&abandoned); segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } - } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts)); + } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, &ts, next)); mi_atomic_decrement(&abandoned_readers); // release reader lock if (segment != NULL) { segment->abandoned_next = NULL; @@ -1298,7 +1295,8 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block // claim it and free mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized. // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { + uintptr_t expected_tid = 0; + if (mi_atomic_cas_strong(&segment->thread_id, &expected_tid, heap->thread_id)) { mi_block_set_next(page, block, page->free); page->free = block; page->used--; @@ -1315,6 +1313,11 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block mi_segments_track_size((long)segment->segment_size, &tld->segments); _mi_segment_page_free(page, true, &tld->segments); } +#if (MI_DEBUG!=0) + else { + mi_assert_internal(false); + } +#endif } /* ----------------------------------------------------------- diff --git a/src/stats.c b/src/stats.c index 172a3c0a..96f57a47 100644 --- a/src/stats.c +++ b/src/stats.c @@ -26,13 +26,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) - mi_atomic_addi64(&stat->current,amount); - mi_atomic_maxi64(&stat->peak, mi_atomic_readi64(&stat->current)); + int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_maxi64_relaxed(&stat->peak, current + amount); if (amount > 0) { - mi_atomic_addi64(&stat->allocated,amount); + mi_atomic_addi64_relaxed(&stat->allocated,amount); } else { - mi_atomic_addi64(&stat->freed, -amount); + mi_atomic_addi64_relaxed(&stat->freed, -amount); } } else { @@ -50,8 +50,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { - mi_atomic_addi64( &stat->count, 1 ); - mi_atomic_addi64( &stat->total, (int64_t)amount ); + mi_atomic_addi64_relaxed( &stat->count, 1 ); + mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount ); } else { stat->count++; @@ -71,17 +71,17 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; if (src->allocated==0 && src->freed==0) return; - mi_atomic_addi64( &stat->allocated, src->allocated * unit); - mi_atomic_addi64( &stat->current, src->current * unit); - mi_atomic_addi64( &stat->freed, src->freed * unit); + mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit); + mi_atomic_addi64_relaxed( &stat->current, src->current * unit); + mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit); // peak scores do not work across threads.. - mi_atomic_addi64( &stat->peak, src->peak * unit); + mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit); } static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_addi64( &stat->total, src->total * unit); - mi_atomic_addi64( &stat->count, src->count * unit); + mi_atomic_addi64_relaxed( &stat->total, src->total * unit); + mi_atomic_addi64_relaxed( &stat->count, src->count * unit); } // must be thread safe as it is called from stats_merge From 95afd0509face89d311830b4b13c6db1dec09685 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jul 2020 23:50:22 -0700 Subject: [PATCH 03/11] make segment abandoned_next atomic; tsan passes without warnings now (issue #130) --- CMakeLists.txt | 1 + include/mimalloc-types.h | 16 ++++++++++++++-- src/segment.c | 30 ++++++++++++++++-------------- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 37616eb4..5a228036 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,6 +126,7 @@ endif() if(MI_DEBUG_TSAN MATCHES "ON") if(CMAKE_C_COMPILER_ID MATCHES "Clang") message(STATUS "Build with thread sanitizer (MI_DEBUG_TSAN=ON)") + list(APPEND mi_defines MI_TSAN=1) list(APPEND mi_cflags -fsanitize=thread -g -O1) list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=thread) else() diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 5b31f6f3..17b33bc6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -155,6 +155,7 @@ typedef enum mi_delayed_e { // The `in_full` and `has_aligned` page flags are put in a union to efficiently // test if both are false (`full_aligned == 0`) in the `mi_free` routine. +#if !MI_TSAN typedef union mi_page_flags_s { uint8_t full_aligned; struct { @@ -162,6 +163,16 @@ typedef union mi_page_flags_s { uint8_t has_aligned : 1; } x; } mi_page_flags_t; +#else +// under thread sanitizer, use a byte for each flag to suppress warning, issue #130 +typedef union mi_page_flags_s { + uint16_t full_aligned; + struct { + uint8_t in_full; + uint8_t has_aligned; + } x; +} mi_page_flags_t; +#endif // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags @@ -245,12 +256,13 @@ typedef struct mi_segment_s { // memory fields size_t memid; // id for the os-level memory manager bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) - bool mem_is_committed; // `true` if the whole segment is eagerly committed + bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* prev; - struct mi_segment_s* abandoned_next; + _Atomic(struct mi_segment_s*) abandoned_next; + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) diff --git a/src/segment.c b/src/segment.c index 58c227bb..5af98b1e 100644 --- a/src/segment.c +++ b/src/segment.c @@ -890,12 +890,12 @@ static mi_decl_cache_align _Atomic(uintptr_t) abandoned_readers; // = // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); mi_segment_t* anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); do { - segment->abandoned_next = anext; + mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, anext); } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, &anext, segment)); } @@ -903,7 +903,7 @@ static void mi_abandoned_visited_push(mi_segment_t* segment) { static bool mi_abandoned_visited_revisit(void) { // quick check if the visited list is empty - if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false; + if (mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; // grab the whole visited list mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); @@ -919,15 +919,16 @@ static bool mi_abandoned_visited_revisit(void) // find the last element of the visited list: O(n) mi_segment_t* last = first; - while (last->abandoned_next != NULL) { - last = last->abandoned_next; + mi_segment_t* next; + while ((next = mi_atomic_read_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { + last = next; } // and atomically prepend to the abandoned list // (no need to increase the readers as we don't access the abandoned segments) mi_tagged_segment_t anext = mi_atomic_read_relaxed(&abandoned); do { - last->abandoned_next = mi_tagged_segment_ptr(anext); + mi_atomic_write_ptr(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); afirst = mi_tagged_segment(first, anext); } while (!mi_atomic_cas_weak(&abandoned, &anext, afirst)); return true; @@ -936,13 +937,13 @@ static bool mi_abandoned_visited_revisit(void) // Push on the abandoned list. static void mi_abandoned_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); mi_tagged_segment_t next; mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); do { - segment->abandoned_next = mi_tagged_segment_ptr(ts); + mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); next = mi_tagged_segment(segment, ts); } while (!mi_atomic_cas_weak(&abandoned, &ts, next)); } @@ -971,19 +972,20 @@ static mi_segment_t* mi_abandoned_pop(void) { // Do a pop. We use a reader count to prevent // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. - // (this is called from `memory.c:_mi_mem_free` for example) + // (this is called from `region.c:_mi_mem_free` for example) mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; ts = mi_atomic_read(&abandoned); do { segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { - next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted + mi_segment_t* anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next); + next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, &ts, next)); mi_atomic_decrement(&abandoned_readers); // release reader lock if (segment != NULL) { - segment->abandoned_next = NULL; + mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, NULL); } return segment; } @@ -995,7 +997,7 @@ static mi_segment_t* mi_abandoned_pop(void) { static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); - mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed @@ -1008,8 +1010,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; - segment->abandoned_next = NULL; segment->abandoned_visits = 0; + mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, NULL); mi_abandoned_push(segment); } @@ -1073,7 +1075,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool // Reclaim a segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { - mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } segment->thread_id = _mi_thread_id(); From ebf951e851de13ecae6e92a6ad1657b61adefac5 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jul 2020 00:15:57 -0700 Subject: [PATCH 04/11] extra checks for atomic ptr exchange; extend mi_atomic_yield for win32 --- include/mimalloc-atomic.h | 108 ++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 40 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index beb0f12c..30d1e4f8 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -93,33 +93,58 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub) { return (intptr_t)mi_atomic_addi(p,-sub); } -// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). -#define mi_atomic_read_ptr_relaxed(T,p) \ - (T*)(mi_atomic_read_relaxed((const _Atomic(uintptr_t)*)(p))) - -// Atomically read a pointer; Memory order is acquire. -#define mi_atomic_read_ptr(T,p) \ - (T*)(mi_atomic_read((const _Atomic(uintptr_t)*)(p))) - -// Atomically write a pointer; Memory order is acquire. -#define mi_atomic_write_ptr(T,p,x) \ - mi_atomic_write((_Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x)) - - -static inline bool mi_atomic_cas_weak_voidp(_Atomic(void*)*p, void** expected, void* desired, void* unused) { - (void)(unused); +// Atomically compare and exchange a void pointer; returns `true` if successful. May fail spuriously. +// Memory order is release. (like a write) +static inline bool mi_atomic_cas_weak_voidp(_Atomic(void*)* p, void** expected, void* desired, void* unused1, void* unused2) { + (void)unused1; (void)unused2; // for extra type check return mi_atomic_cas_weak((_Atomic(uintptr_t)*)p, (uintptr_t*)expected, (uintptr_t)desired); } +// Atomically read a void pointer; Memory order is relaxed (i.e. no fence, only atomic). +static inline void* mi_atomic_read_voidp(const _Atomic(void*)* p, void* unused) { + (void)unused; // for extra type check + return (void*)mi_atomic_read((const _Atomic(uintptr_t)*) p); +} + +// Atomically read a void pointer; Memory order is acquire. +static inline void* mi_atomic_read_voidp_relaxed(const _Atomic(void*)*p, void* unused) { + (void)unused; // for extra type check + return (void*)mi_atomic_read_relaxed((const _Atomic(uintptr_t)*) p); +} + +// Atomically write a void pointer; Memory order is acquire. +static inline void mi_atomic_write_voidp(_Atomic(void*)* p, void* exchange, void* unused) { + (void)unused; // for extra type check + mi_atomic_write((_Atomic(uintptr_t)*) p, (uintptr_t)exchange); +} + +// Atomically exchange a void pointer; Memory order is release-acquire. +static inline void* mi_atomic_exchange_voidp(_Atomic(void*)*p, void* exchange, void* unused) { + (void)unused; // for extra type check + return (void*)mi_atomic_exchange((_Atomic(uintptr_t)*) p, (uintptr_t)exchange); +} + // Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. // Memory order is release. (like a write) #define mi_atomic_cas_ptr_weak(T,p,expected,desired) \ - mi_atomic_cas_weak_voidp((_Atomic(void*)*)(p), (void**)(expected), desired, *(expected)) - + mi_atomic_cas_weak_voidp((_Atomic(void*)*)(p), (void**)(expected), desired, *(p), *(expected)) +// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). +#define mi_atomic_read_ptr_relaxed(T,p) \ + (T*)(mi_atomic_read_voidp_relaxed((const _Atomic(void*)*)(p), *(p))) + +// Atomically read a pointer; Memory order is acquire. +#define mi_atomic_read_ptr(T,p) \ + (T*)(mi_atomic_read_voidp((const _Atomic(void*)*)(p), *(p))) + +// Atomically write a pointer; Memory order is acquire. +#define mi_atomic_write_ptr(T,p,x) \ + mi_atomic_write_voidp((_Atomic(void*)*)(p), x, *(p)) + // Atomically exchange a pointer value. #define mi_atomic_exchange_ptr(T,p,exchange) \ - (T*)mi_atomic_exchange((_Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) + (T*)(mi_atomic_exchange_voidp((_Atomic(void*)*)(p), exchange, *(p))) + #if !defined(__cplusplus) && defined(_MSC_VER) @@ -171,9 +196,6 @@ static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) { mi_atomic_exchange(p,x); #endif } -static inline void mi_atomic_yield(void) { - YieldProcessor(); -} static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 return (int64_t)mi_atomic_addi((int64_t*)p,add); @@ -246,35 +268,41 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { int64_t current = atomic_load_explicit((_Atomic(int64_t)*)p, memory_order_relaxed); while (current < x && !atomic_compare_exchange_weak_explicit((_Atomic(int64_t)*)p, ¤t, x, memory_order_acq_rel, memory_order_acquire)) { /* nothing */ }; } +#endif #if defined(__cplusplus) - #include - static inline void mi_atomic_yield(void) { - std::this_thread::yield(); - } +#include +static inline void mi_atomic_yield(void) { + std::this_thread::yield(); +} +#elif defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include +static inline void mi_atomic_yield(void) { + YieldProcessor(); +} #elif (defined(__GNUC__) || defined(__clang__)) && \ (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) #if defined(__x86_64__) || defined(__i386__) - static inline void mi_atomic_yield(void) { - asm volatile ("pause" ::: "memory"); - } +static inline void mi_atomic_yield(void) { + asm volatile ("pause" ::: "memory"); +} #elif defined(__arm__) || defined(__aarch64__) - static inline void mi_atomic_yield(void) { - asm volatile("yield"); - } +static inline void mi_atomic_yield(void) { + asm volatile("yield"); +} #endif #elif defined(__wasi__) - #include - static inline void mi_atomic_yield(void) { - sched_yield(); - } +#include +static inline void mi_atomic_yield(void) { + sched_yield(); +} #else - #include - static inline void mi_atomic_yield(void) { - sleep(0); - } +#include +static inline void mi_atomic_yield(void) { + sleep(0); +} #endif -#endif #endif // __MIMALLOC_ATOMIC_H From 28014ee2bc049921effdd9a39fe983a43108cbdc Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jul 2020 00:16:17 -0700 Subject: [PATCH 05/11] fix atomic access for MADV_FREE in os_reset --- src/os.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/os.c b/src/os.c index 29a76a88..0b959a9c 100644 --- a/src/os.c +++ b/src/os.c @@ -759,12 +759,12 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) if (p != start) return false; #else #if defined(MADV_FREE) - static int advice = MADV_FREE; - int err = madvise(start, csize, advice); + static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(MADV_FREE); + int err = madvise(start, csize, (int)mi_atomic_read_relaxed(&advice)); if (err != 0 && errno == EINVAL && advice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on - advice = MADV_DONTNEED; - err = madvise(start, csize, advice); + mi_atomic_write(&advice, MADV_DONTNEED); + err = madvise(start, csize, MADV_DONTNEED); } #elif defined(__wasi__) int err = 0; From 53cbc68de3c65f90787641d2ab2c564a5662244f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jul 2020 00:21:10 -0700 Subject: [PATCH 06/11] display compiler in cmake summary --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a228036..98b55ae0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,6 +231,11 @@ endif() message(STATUS "") message(STATUS "Library base name: ${mi_basename}") message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}") +if(MI_USE_CXX MATCHES "ON") + message(STATUS "Compiler : ${CMAKE_CXX_COMPILER}") +else() + message(STATUS "Compiler : ${CMAKE_C_COMPILER}") +endif() message(STATUS "Install directory: ${mi_install_dir}") message(STATUS "Build targets : ${mi_build_targets}") message(STATUS "") From 116159cd40d64fa9e1e50a6c54dd322e2a482659 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jul 2020 11:57:14 -0700 Subject: [PATCH 07/11] use RtlGenRandom on windows to enable compilation as C++ even with dynamic override --- src/random.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/random.c b/src/random.c index 5c093a91..be95fc46 100644 --- a/src/random.c +++ b/src/random.c @@ -162,20 +162,29 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim -----------------------------------------------------------------------------*/ #if defined(_WIN32) +/* +// We prefer BCryptGenRandom over RtlGenRandom but it leads to a crash a when using dynamic override combined with the C++ runtime :-( #pragma comment (lib,"bcrypt.lib") #include static bool os_random_buf(void* buf, size_t buf_len) { return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); } -/* -#define SystemFunction036 NTAPI SystemFunction036 -#include -#undef SystemFunction036 -static bool os_random_buf(void* buf, size_t buf_len) { - RtlGenRandom(buf, (ULONG)buf_len); - return true; -} */ +#define RtlGenRandom SystemFunction036 +#ifdef __cplusplus +extern "C" { +#endif +BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); +#ifdef __cplusplus +} +#endif +static bool os_random_buf(void* buf, size_t buf_len) { + mi_assert_internal(buf_len >= sizeof(uintptr_t)); + memset(buf, 0, buf_len); + RtlGenRandom(buf, (ULONG)buf_len); + return (((uintptr_t*)buf)[0] != 0); // sanity check (but RtlGenRandom should never fail) +} + #elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__sun) || defined(__wasi__) From a9f46dc86f94e5a91eb3315ce2e8b9be6beea55a Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jul 2020 11:58:02 -0700 Subject: [PATCH 08/11] reduce memory order constraints for better efficiency on ARM etc --- include/mimalloc-atomic.h | 27 +++++++++++++++++++-------- src/page.c | 2 +- src/segment.c | 2 +- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 30d1e4f8..b9935cb3 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -27,19 +27,23 @@ terms of the MIT license. A copy of the license can be found in the file // Atomic operations specialized for mimalloc // ------------------------------------------------------ -// Atomically add a value; returns the previous value. Memory ordering is acquire-release. +// Atomically add a value; returns the previous value. Memory ordering is relaxed. static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add); -// Atomically "and" a value; returns the previous value. Memory ordering is acquire-release. +// Atomically "and" a value; returns the previous value. Memory ordering is release. static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x); -// Atomically "or" a value; returns the previous value. Memory ordering is acquire-release. +// Atomically "or" a value; returns the previous value. Memory ordering is release. static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x); // Atomically compare and exchange a value; returns `true` if successful. -// May fail spuriously. Memory ordering is acquire-release; with acquire on failure. +// May fail spuriously. Memory ordering is release; with relaxed on failure. static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired); +// Atomically compare and exchange a value; returns `true` if successful. +// May fail spuriously. Memory ordering is acquire-release; with acquire on failure. +static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired); + // Atomically compare and exchange a value; returns `true` if successful. // Memory ordering is acquire-release; with acquire on failure. static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired); @@ -180,6 +184,9 @@ static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expect static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { return mi_atomic_cas_strong(p,expected,desired); } +static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired) { + return mi_atomic_cas_strong(p, expected, desired); +} static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) { return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } @@ -225,17 +232,21 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t #endif static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) { MI_USING_STD - return atomic_fetch_add_explicit(p, add, memory_order_acq_rel); + return atomic_fetch_add_explicit(p, add, memory_order_relaxed); } static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD - return atomic_fetch_and_explicit(p, x, memory_order_acq_rel); + return atomic_fetch_and_explicit(p, x, memory_order_release); } static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD - return atomic_fetch_or_explicit(p, x, memory_order_acq_rel); + return atomic_fetch_or_explicit(p, x, memory_order_release); } static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { + MI_USING_STD + return atomic_compare_exchange_weak_explicit(p, expected, desired, memory_order_release, memory_order_relaxed); +} +static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired) { MI_USING_STD return atomic_compare_exchange_weak_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire); } @@ -266,7 +277,7 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { MI_USING_STD int64_t current = atomic_load_explicit((_Atomic(int64_t)*)p, memory_order_relaxed); - while (current < x && !atomic_compare_exchange_weak_explicit((_Atomic(int64_t)*)p, ¤t, x, memory_order_acq_rel, memory_order_acquire)) { /* nothing */ }; + while (current < x && !atomic_compare_exchange_weak_explicit((_Atomic(int64_t)*)p, ¤t, x, memory_order_release, memory_order_relaxed)) { /* nothing */ }; } #endif diff --git a/src/page.c b/src/page.c index 6b92d4c9..92faf9f2 100644 --- a/src/page.c +++ b/src/page.c @@ -159,7 +159,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) do { head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); - } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); + } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); // return if the list is empty if (head == NULL) return; diff --git a/src/segment.c b/src/segment.c index 5af98b1e..55230553 100644 --- a/src/segment.c +++ b/src/segment.c @@ -982,7 +982,7 @@ static mi_segment_t* mi_abandoned_pop(void) { mi_segment_t* anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next); next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } - } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, &ts, next)); + } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); mi_atomic_decrement(&abandoned_readers); // release reader lock if (segment != NULL) { mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, NULL); From a468430772a687085054e8380a94f794bd740f5c Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jul 2020 14:19:30 -0700 Subject: [PATCH 09/11] strengthen memory order of bit operations; insert memory fences --- include/mimalloc-atomic.h | 6 +++--- src/alloc.c | 2 +- src/segment.c | 8 +++++--- test/test-stress.c | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index b9935cb3..cb247b09 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -232,15 +232,15 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t #endif static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) { MI_USING_STD - return atomic_fetch_add_explicit(p, add, memory_order_relaxed); + return atomic_fetch_add_explicit(p, add, memory_order_acq_rel); } static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD - return atomic_fetch_and_explicit(p, x, memory_order_release); + return atomic_fetch_and_explicit(p, x, memory_order_acq_rel); } static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD - return atomic_fetch_or_explicit(p, x, memory_order_release); + return atomic_fetch_or_explicit(p, x, memory_order_acq_rel); } static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { MI_USING_STD diff --git a/src/alloc.c b/src/alloc.c index 62c3c018..e1c54bed 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -323,7 +323,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* const heap = mi_page_heap(page); + mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_read(&page->xheap)); //mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) diff --git a/src/segment.c b/src/segment.c index 55230553..b5fd13d3 100644 --- a/src/segment.c +++ b/src/segment.c @@ -472,7 +472,6 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false; } - _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -629,6 +628,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ return NULL; } } + atomic_thread_fence(memory_order_acq_rel); segment->memid = memid; segment->mem_is_fixed = mem_large; segment->mem_is_committed = commit; @@ -638,6 +638,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ mi_assert_internal(segment->mem_is_fixed ? segment->mem_is_committed : true); if (!pages_still_good) { // zero the segment info (but not the `mem` fields) + atomic_thread_fence(memory_order_release); // with read of `abandoned_next` in `mi_abandoned_pop` ptrdiff_t ofs = offsetof(mi_segment_t, next); memset((uint8_t*)segment + ofs, 0, info_size - ofs); @@ -791,6 +792,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + atomic_thread_fence(memory_order_release); page->capacity = capacity; page->reserved = reserved; page->xblock_size = block_size; @@ -801,7 +803,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a mi_pages_reset_add(segment, page, tld); } - page->capacity = 0; // after reset there can be zero'd now + page->capacity = 0; // after reset these can be zero'd now page->reserved = 0; } @@ -979,7 +981,7 @@ static mi_segment_t* mi_abandoned_pop(void) { do { segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { - mi_segment_t* anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next); + mi_segment_t* anext = mi_atomic_read_ptr(mi_segment_t, &segment->abandoned_next); next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); diff --git a/test/test-stress.c b/test/test-stress.c index 7d8993a0..33ec674b 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -189,7 +189,7 @@ static void test_stress(void) { } } // mi_collect(false); -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(MI_TSAN) if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } From ef8e5d18a65f653bbef9cf57694aff37d2e85b9d Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jul 2020 18:00:38 -0700 Subject: [PATCH 10/11] replace atomics with C11/C++ atomics with explicit memory order; passes tsan. Issue #130 --- include/mimalloc-atomic.h | 319 ++++++++++++++---------------------- include/mimalloc-internal.h | 8 +- include/mimalloc-types.h | 4 +- src/alloc.c | 14 +- src/arena.c | 18 +- src/bitmap.inc.c | 14 +- src/heap.c | 2 +- src/options.c | 18 +- src/os.c | 26 +-- src/page-queue.c | 6 +- src/page.c | 18 +- src/random.c | 4 +- src/region.c | 50 +++--- src/segment.c | 62 ++++--- 14 files changed, 248 insertions(+), 315 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index cb247b09..e1fdda16 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018, Microsoft Research, Daan Leijen +Copyright (c) 2018,2020 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,150 +8,97 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_ATOMIC_H #define MIMALLOC_ATOMIC_H -// ------------------------------------------------------ +// -------------------------------------------------------------------------------------------- // Atomics // We need to be portable between C, C++, and MSVC. -// ------------------------------------------------------ +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// This is why we try to use only `uintptr_t` and `*` as atomic types. +// To gain better insight in the range of used atomics, we use explicitly named memory order operations +// instead of passing the memory order as a parameter. +// ----------------------------------------------------------------------------------------------- #if defined(__cplusplus) +// Use C++ atomics #include -#define _Atomic(tp) std::atomic +#define _Atomic(tp) std::atomic +#define mi_atomic(name) std::atomic_##name +#define mi_memory_order(name) std::memory_order_##name #elif defined(_MSC_VER) +// Use MSVC C wrapper for C11 atomics #define _Atomic(tp) tp #define ATOMIC_VAR_INIT(x) x +#define mi_atomic(name) mi_atomic_##name +#define mi_memory_order(name) mi_memory_order_##name #else +// Use C11 atomics #include +#define mi_atomic(name) atomic_##name +#define mi_memory_order(name) memory_order_##name #endif -// ------------------------------------------------------ -// Atomic operations specialized for mimalloc -// ------------------------------------------------------ +// Various defines for all used memory orders in mimalloc +#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) -// Atomically add a value; returns the previous value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add); +#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) -// Atomically "and" a value; returns the previous value. Memory ordering is release. -static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x); +#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) +#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) -// Atomically "or" a value; returns the previous value. Memory ordering is release. -static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x); +#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) -// Atomically compare and exchange a value; returns `true` if successful. -// May fail spuriously. Memory ordering is release; with relaxed on failure. -static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired); +#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,1) +#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,1) +#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,1) +#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,1) -// Atomically compare and exchange a value; returns `true` if successful. -// May fail spuriously. Memory ordering is acquire-release; with acquire on failure. -static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired); - -// Atomically compare and exchange a value; returns `true` if successful. -// Memory ordering is acquire-release; with acquire on failure. -static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired); - -// Atomically exchange a value. Memory ordering is acquire-release. -static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange); - -// Atomically read a value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_read_relaxed(const _Atomic(uintptr_t)* p); - -// Atomically read a value. Memory ordering is acquire. -static inline uintptr_t mi_atomic_read(const _Atomic(uintptr_t)* p); - -// Atomically write a value. Memory ordering is release. -static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x); - -// Yield static inline void mi_atomic_yield(void); - -// Atomically add a 64-bit value; returns the previous value. Memory ordering is relaxed. -// Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add); - -// Atomically update `*p` with the maximum of `*p` and `x` as a 64-bit value. -// Returns the previous value. Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x); +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add); +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub); -// Atomically subtract a value; returns the previous value. -static inline uintptr_t mi_atomic_sub(_Atomic(uintptr_t)* p, uintptr_t sub) { - return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub))); +#if defined(__cplusplus) || !defined(_MSC_VER) + +// In C++/C11 atomics we have polymorpic atomics so can use the typed `ptr` variants +// (where `tp` is the type of atomic value) +// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well +#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) +#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { + return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); +} +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { + int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); + while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; } -// Atomically increment a value; returns the incremented result. -static inline uintptr_t mi_atomic_increment(_Atomic(uintptr_t)* p) { - return mi_atomic_add(p, 1); -} -// Atomically decrement a value; returns the decremented result. -static inline uintptr_t mi_atomic_decrement(_Atomic(uintptr_t)* p) { - return mi_atomic_sub(p, 1); -} +#elif defined(_MSC_VER) -// Atomically add a signed value; returns the previous value. -static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)mi_atomic_add((_Atomic(uintptr_t)*)p, (uintptr_t)add); -} - -// Atomically subtract a signed value; returns the previous value. -static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub) { - return (intptr_t)mi_atomic_addi(p,-sub); -} - -// Atomically compare and exchange a void pointer; returns `true` if successful. May fail spuriously. -// Memory order is release. (like a write) -static inline bool mi_atomic_cas_weak_voidp(_Atomic(void*)* p, void** expected, void* desired, void* unused1, void* unused2) { - (void)unused1; (void)unused2; // for extra type check - return mi_atomic_cas_weak((_Atomic(uintptr_t)*)p, (uintptr_t*)expected, (uintptr_t)desired); -} - -// Atomically read a void pointer; Memory order is relaxed (i.e. no fence, only atomic). -static inline void* mi_atomic_read_voidp(const _Atomic(void*)* p, void* unused) { - (void)unused; // for extra type check - return (void*)mi_atomic_read((const _Atomic(uintptr_t)*) p); -} - -// Atomically read a void pointer; Memory order is acquire. -static inline void* mi_atomic_read_voidp_relaxed(const _Atomic(void*)*p, void* unused) { - (void)unused; // for extra type check - return (void*)mi_atomic_read_relaxed((const _Atomic(uintptr_t)*) p); -} - -// Atomically write a void pointer; Memory order is acquire. -static inline void mi_atomic_write_voidp(_Atomic(void*)* p, void* exchange, void* unused) { - (void)unused; // for extra type check - mi_atomic_write((_Atomic(uintptr_t)*) p, (uintptr_t)exchange); -} - -// Atomically exchange a void pointer; Memory order is release-acquire. -static inline void* mi_atomic_exchange_voidp(_Atomic(void*)*p, void* exchange, void* unused) { - (void)unused; // for extra type check - return (void*)mi_atomic_exchange((_Atomic(uintptr_t)*) p, (uintptr_t)exchange); -} - -// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. -// Memory order is release. (like a write) -#define mi_atomic_cas_ptr_weak(T,p,expected,desired) \ - mi_atomic_cas_weak_voidp((_Atomic(void*)*)(p), (void**)(expected), desired, *(p), *(expected)) - -// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). -#define mi_atomic_read_ptr_relaxed(T,p) \ - (T*)(mi_atomic_read_voidp_relaxed((const _Atomic(void*)*)(p), *(p))) - -// Atomically read a pointer; Memory order is acquire. -#define mi_atomic_read_ptr(T,p) \ - (T*)(mi_atomic_read_voidp((const _Atomic(void*)*)(p), *(p))) - -// Atomically write a pointer; Memory order is acquire. -#define mi_atomic_write_ptr(T,p,x) \ - mi_atomic_write_voidp((_Atomic(void*)*)(p), x, *(p)) - -// Atomically exchange a pointer value. -#define mi_atomic_exchange_ptr(T,p,exchange) \ - (T*)(mi_atomic_exchange_voidp((_Atomic(void*)*)(p), exchange, *(p))) - - - -#if !defined(__cplusplus) && defined(_MSC_VER) +// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. #define WIN32_LEAN_AND_MEAN #include #include @@ -162,16 +109,29 @@ typedef LONG64 msc_intptr_t; typedef LONG msc_intptr_t; #define MI_64(f) f #endif -static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) { + +typedef enum mi_memory_order_e { + mi_memory_order_relaxed, + mi_memory_order_consume, + mi_memory_order_acquire, + mi_memory_order_release, + mi_memory_order_acq_rel, + mi_memory_order_seq_cst +} mi_memory_order; + +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)* p, uintptr_t add, mi_memory_order mo) { return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } -static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); +} +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); if (read == *expected) { return true; @@ -181,28 +141,36 @@ static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expect return false; } } -static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { - return mi_atomic_cas_strong(p,expected,desired); +static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); } -static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired) { - return mi_atomic_cas_strong(p, expected, desired); -} -static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) { +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)* p, uintptr_t exchange, mi_memory_order mo) { return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } -static inline uintptr_t mi_atomic_read(_Atomic(uintptr_t) const* p) { - return *p; +static inline mi_atomic_thread_fence(mi_memory_order mo) { + _Atomic(uintptr_t)x = 0; + mi_atomic_exchange_explicit(&x, 1, mo); } -static inline uintptr_t mi_atomic_read_relaxed(_Atomic(uintptr_t) const* p) { +static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { + #if defined(_M_IX86) || defined(_M_X64) return *p; + #else + uintptr_t x = *p; + if (mo > mi_memory_order_relaxed) { + while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; + } + return x; + #endif } -static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) { +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { #if defined(_M_IX86) || defined(_M_X64) *p = x; #else - mi_atomic_exchange(p,x); + mi_atomic_exchange_explicit(p,x,mo); #endif } + +// These are used by the statistics static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 return (int64_t)mi_atomic_addi((int64_t*)p,add); @@ -216,7 +184,6 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int return current; #endif } - static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { int64_t current; do { @@ -224,63 +191,31 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } -#else -#ifdef __cplusplus -#define MI_USING_STD using namespace std; -#else -#define MI_USING_STD -#endif -static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) { - MI_USING_STD - return atomic_fetch_add_explicit(p, add, memory_order_acq_rel); -} -static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_fetch_and_explicit(p, x, memory_order_acq_rel); -} -static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_fetch_or_explicit(p, x, memory_order_acq_rel); -} -static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { - MI_USING_STD - return atomic_compare_exchange_weak_explicit(p, expected, desired, memory_order_release, memory_order_relaxed); -} -static inline bool mi_atomic_cas_weak_acq_rel(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired) { - MI_USING_STD - return atomic_compare_exchange_weak_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire); -} -static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) { - MI_USING_STD - return atomic_compare_exchange_strong_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire); -} -static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) { - MI_USING_STD - return atomic_exchange_explicit(p, exchange, memory_order_acq_rel); -} -static inline uintptr_t mi_atomic_read_relaxed(const _Atomic(uintptr_t)* p) { - MI_USING_STD - return atomic_load_explicit((_Atomic(uintptr_t)*) p, memory_order_relaxed); -} -static inline uintptr_t mi_atomic_read(const _Atomic(uintptr_t)* p) { - MI_USING_STD - return atomic_load_explicit((_Atomic(uintptr_t)*) p, memory_order_acquire); -} -static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_store_explicit(p, x, memory_order_release); -} -static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { - MI_USING_STD - return atomic_fetch_add_explicit((_Atomic(int64_t)*)p, add, memory_order_relaxed); -} -static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { - MI_USING_STD - int64_t current = atomic_load_explicit((_Atomic(int64_t)*)p, memory_order_relaxed); - while (current < x && !atomic_compare_exchange_weak_explicit((_Atomic(int64_t)*)p, ¤t, x, memory_order_release, memory_order_relaxed)) { /* nothing */ }; -} +// The pointer macros cast to `uintptr_t`. +#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) + #endif + +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { + return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); +} + +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p, -sub); +} + +// Yield #if defined(__cplusplus) #include static inline void mi_atomic_yield(void) { diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2dc7e36a..1afdae9c 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -448,21 +448,21 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { - return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); + return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); } static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { - return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3); + return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3); } // Heap access static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { - return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap)); + return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); } static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); - mi_atomic_write(&page->xheap,(uintptr_t)heap); + mi_atomic_store_release(&page->xheap,(uintptr_t)heap); } // Thread free flag helpers diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 17b33bc6..18c415eb 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -259,9 +259,9 @@ typedef struct mi_segment_s { bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields - struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` - struct mi_segment_s* prev; _Atomic(struct mi_segment_s*) abandoned_next; + struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init` + struct mi_segment_s* prev; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) diff --git a/src/alloc.c b/src/alloc.c index e1c54bed..ebf90ebc 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -307,7 +307,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfreex; bool use_delayed; - mi_thread_free_t tfree = mi_atomic_read_relaxed(&page->xthread_free); + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if (mi_unlikely(use_delayed)) { @@ -319,27 +319,27 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_read(&page->xheap)); //mi_page_heap(page); + mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) - mi_block_t* dfree = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap,block,dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } // and reset the MI_DELAYED_FREEING flag - tfree = mi_atomic_read_relaxed(&page->xthread_free); + tfree = mi_atomic_load_relaxed(&page->xthread_free); do { tfreex = tfree; mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); } } diff --git a/src/arena.c b/src/arena.c index 1c1fc1a0..73a7e704 100644 --- a/src/arena.c +++ b/src/arena.c @@ -105,12 +105,12 @@ static size_t mi_block_count_of_size(size_t size) { static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { const size_t fcount = arena->field_count; - size_t idx = mi_atomic_read(&arena->search_idx); // start from last search + size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around // try to atomically claim a range of bits if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { - mi_atomic_write(&arena->search_idx, idx); // start search from here next time + mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time return true; } } @@ -175,7 +175,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages @@ -187,7 +187,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, } // try from another numa node instead.. for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages @@ -228,7 +228,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s size_t bitmap_idx; mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -254,15 +254,15 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); - mi_assert_internal((uintptr_t)mi_atomic_read_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); + mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - uintptr_t i = mi_atomic_increment(&mi_arena_count); + uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { - mi_atomic_decrement(&mi_arena_count); + mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } - mi_atomic_write_ptr(mi_arena_t,&mi_arenas[i], arena); + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); return true; } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index b9953a4f..2d6df46e 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -121,9 +121,9 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); - uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); + uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]); if ((field & mask) == 0) { // free? - if (mi_atomic_cas_strong(&bitmap[idx], &field, (field|mask))) { + if (mi_atomic_cas_strong_acq_rel(&bitmap[idx], &field, (field|mask))) { // claimed! return true; } @@ -138,7 +138,7 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx { mi_assert_internal(bitmap_idx != NULL); _Atomic(uintptr_t)* field = &bitmap[idx]; - uintptr_t map = mi_atomic_read(field); + uintptr_t map = mi_atomic_load_relaxed(field); if (map==MI_BITMAP_FIELD_FULL) return false; // short cut // search for 0-bit sequence of length count @@ -158,7 +158,7 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx mi_assert_internal((m >> bitidx) == mask); // no overflow? const uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(field, &map, newmap)) { // TODO: use strong cas here? + if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } @@ -204,7 +204,7 @@ static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, s const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); - uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); + uintptr_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); } @@ -217,7 +217,7 @@ static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, siz const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); - uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); + uintptr_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); if (any_zero != NULL) *any_zero = ((prev & mask) != mask); return ((prev & mask) == 0); } @@ -228,7 +228,7 @@ static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_field const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); + uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]); if (any_ones != NULL) *any_ones = ((field & mask) != 0); return ((field & mask) == mask); } diff --git a/src/heap.c b/src/heap.c index 5d0d4b8a..526c93ed 100644 --- a/src/heap.c +++ b/src/heap.c @@ -143,7 +143,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); + mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); // collect segment caches if (collect >= MI_FORCE) { diff --git a/src/options.c b/src/options.c index 78c01456..85cbf7f6 100644 --- a/src/options.c +++ b/src/options.c @@ -173,11 +173,11 @@ static _Atomic(uintptr_t) out_len; static void mi_out_buf(const char* msg, void* arg) { UNUSED(arg); if (msg==NULL) return; - if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; + if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = strlen(msg); if (n==0) return; // claim space - uintptr_t start = mi_atomic_add(&out_len, n); + uintptr_t start = mi_atomic_add_acq_rel(&out_len, n); if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound if (start+n >= MI_MAX_DELAY_OUTPUT) { @@ -189,7 +189,7 @@ static void mi_out_buf(const char* msg, void* arg) { static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) - size_t count = mi_atomic_add(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); + size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; @@ -220,14 +220,14 @@ static mi_output_fun* volatile mi_out_default; // = NULL static _Atomic(void*) mi_out_arg; // = NULL static mi_output_fun* mi_out_get_default(void** parg) { - if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); } + if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - mi_atomic_write_ptr(void,&mi_out_arg, arg); + mi_atomic_store_ptr_release(void,&mi_out_arg, arg); if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } @@ -313,13 +313,13 @@ void _mi_verbose_message(const char* fmt, ...) { static void mi_show_error_message(const char* fmt, va_list args) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > mi_max_error_count) return; + if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > mi_max_error_count) return; + if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args); @@ -365,7 +365,7 @@ static void mi_error_default(int err) { void mi_register_error(mi_error_fun* fun, void* arg) { mi_error_handler = fun; // can be NULL - mi_atomic_write_ptr(void,&mi_error_arg, arg); + mi_atomic_store_ptr_release(void,&mi_error_arg, arg); } void _mi_error_message(int err, const char* fmt, ...) { @@ -376,7 +376,7 @@ void _mi_error_message(int err, const char* fmt, ...) { va_end(args); // and call the error handler which may abort (or return normally) if (mi_error_handler != NULL) { - mi_error_handler(err, mi_atomic_read_ptr(void,&mi_error_arg)); + mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg)); } else { mi_error_default(err); diff --git a/src/os.c b/src/os.c index 0b959a9c..8d0c8237 100644 --- a/src/os.c +++ b/src/os.c @@ -270,11 +270,11 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { - uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_cas_strong(&large_page_try_ok, &try_ok, try_ok - 1); + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); } else { // large OS pages must always reserve and commit. @@ -283,7 +283,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations + mi_atomic_store_release(&large_page_try_ok,10); // on error, don't try again for the next N allocations } } } @@ -361,13 +361,13 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static _Atomic(uintptr_t) large_page_try_ok; // = 0; - uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); if (!large_only && try_ok > 0) { // If the OS is not configured for large OS pages, or the user does not have // enough permission, the `mmap` will always fail (but it might also fail for other reasons). // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times // to avoid too many failing calls to mmap. - mi_atomic_cas_strong(&large_page_try_ok, &try_ok, try_ok - 1); + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); } else { int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux @@ -407,7 +407,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif if (large_only) return p; if (p == NULL) { - mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations + mi_atomic_store_release(&large_page_try_ok, 10); // on error, don't try again for the next N allocations } } } @@ -455,7 +455,7 @@ static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; if ((size%MI_SEGMENT_SIZE) != 0) return NULL; - uintptr_t hint = mi_atomic_add(&aligned_base, size); + uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) uintptr_t init = ((uintptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode @@ -463,8 +463,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif uintptr_t expected = hint + size; - mi_atomic_cas_strong(&aligned_base, &expected, init); - hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all + mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); + hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; return (void*)hint; @@ -760,10 +760,10 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) #else #if defined(MADV_FREE) static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(MADV_FREE); - int err = madvise(start, csize, (int)mi_atomic_read_relaxed(&advice)); + int err = madvise(start, csize, (int)mi_atomic_load_relaxed(&advice)); if (err != 0 && errno == EINVAL && advice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on - mi_atomic_write(&advice, MADV_DONTNEED); + mi_atomic_store_release(&advice, MADV_DONTNEED); err = madvise(start, csize, MADV_DONTNEED); } #elif defined(__wasi__) @@ -970,7 +970,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { uintptr_t start = 0; uintptr_t end = 0; - uintptr_t huge_start = mi_atomic_read_relaxed(&mi_huge_start); + uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start); do { start = huge_start; if (start == 0) { @@ -983,7 +983,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); - } while (!mi_atomic_cas_strong(&mi_huge_start, &huge_start, end)); + } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end)); if (total_size != NULL) *total_size = size; return (uint8_t*)start; diff --git a/src/page-queue.c b/src/page-queue.c index ea213019..37719e02 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { heap->page_count--; page->next = NULL; page->prev = NULL; - // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } @@ -274,7 +274,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -341,7 +341,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue for (mi_page_t* page = append->first; page != NULL; page = page->next) { // inline `mi_page_set_heap` to avoid wrong assertion during absorption; // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive. - mi_atomic_write(&page->xheap, (uintptr_t)heap); + mi_atomic_store_release(&page->xheap, (uintptr_t)heap); // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a // side effect that it spins until any DELAYED_FREEING is finished. This ensures // that after appending only the new heap will be used for delayed free operations. diff --git a/src/page.c b/src/page.c index 92faf9f2..cd96bb90 100644 --- a/src/page.c +++ b/src/page.c @@ -126,7 +126,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid mi_delayed_t old_delay; mi_thread_free_t tfree; do { - tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; + tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { @@ -140,7 +140,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid break; // leave never-delayed flag set } } while ((old_delay == MI_DELAYED_FREEING) || - !mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex)); + !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); } /* ----------------------------------------------------------- @@ -155,7 +155,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) { mi_block_t* head; mi_thread_free_t tfreex; - mi_thread_free_t tfree = mi_atomic_read_relaxed(&page->xthread_free); + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); @@ -273,8 +273,8 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { ----------------------------------------------------------- */ void _mi_heap_delayed_free(mi_heap_t* heap) { // take over the list (note: no atomic exchange since it is often NULL) - mi_block_t* block = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); - while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; + mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; // and free them all while(block != NULL) { @@ -283,10 +283,10 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting. - mi_block_t* dfree = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap, block, dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } block = next; } @@ -736,14 +736,14 @@ void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; - deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(void,&deferred_arg)); + deferred_free(force, heap->tld->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg)); heap->tld->recurse = false; } } void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; - mi_atomic_write_ptr(void,&deferred_arg, arg); + mi_atomic_store_ptr_release(void,&deferred_arg, arg); } diff --git a/src/random.c b/src/random.c index be95fc46..836f83a2 100644 --- a/src/random.c +++ b/src/random.c @@ -210,11 +210,11 @@ static bool os_random_buf(void* buf, size_t buf_len) { #define GRND_NONBLOCK (1) #endif static _Atomic(uintptr_t) no_getrandom; // = 0 - if (mi_atomic_read(&no_getrandom)==0) { + if (mi_atomic_load_acquire(&no_getrandom)==0) { ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); if (ret >= 0) return (buf_len == (size_t)ret); if (ret != ENOSYS) return false; - mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom + mi_atomic_store_release(&no_getrandom,1); // don't call again, and fall back to /dev/urandom } #endif int flags = O_RDONLY; diff --git a/src/region.c b/src/region.c index d2904687..e916e452 100644 --- a/src/region.c +++ b/src/region.c @@ -123,9 +123,9 @@ static size_t mi_good_commit_size(size_t size) { // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; - size_t count = mi_atomic_read_relaxed(®ions_count); + size_t count = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = mi_atomic_read_ptr_relaxed(uint8_t,®ions[i].start); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; @@ -133,7 +133,7 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); mi_assert_internal(start != NULL); return (start + (bit_idx * MI_SEGMENT_SIZE)); } @@ -171,7 +171,7 @@ static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? - if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; + if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); @@ -184,9 +184,9 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mi_assert_internal(!region_large || region_commit); // claim a fresh slot - const uintptr_t idx = mi_atomic_increment(®ions_count); + const uintptr_t idx = mi_atomic_increment_acq_rel(®ions_count); if (idx >= MI_REGION_MAX) { - mi_atomic_decrement(®ions_count); + mi_atomic_decrement_acq_rel(®ions_count); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats); _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB)); return false; @@ -195,13 +195,13 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; - mi_atomic_write(&r->in_use, 0); - mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); - mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); - mi_atomic_write(&r->reset, 0); + mi_atomic_store_release(&r->in_use, 0); + mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); + mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); + mi_atomic_store_release(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_write_ptr(uint8_t*,&r->start, start); + mi_atomic_store_ptr_release(uint8_t*,&r->start, start); // and share it mi_region_info_t info; @@ -209,7 +209,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, info.x.valid = true; info.x.is_large = region_large; info.x.numa_node = (short)_mi_os_numa_node(tld); - mi_atomic_write(&r->info, info.value); // now make it available to others + mi_atomic_store_release(&r->info, info.value); // now make it available to others *region = r; return true; } @@ -221,7 +221,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info; - info.value = mi_atomic_read_relaxed(®ion->info); + info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); if (info.value==0) return false; // numa correct @@ -240,7 +240,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot - const size_t count = mi_atomic_read(®ions_count); + const size_t count = mi_atomic_load_acquire(®ions_count); size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around @@ -280,8 +280,8 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); mi_region_info_t info; - info.value = mi_atomic_read(®ion->info); - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); + info.value = mi_atomic_load_acquire(®ion->info); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); @@ -400,7 +400,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); mi_region_info_t info; - info.value = mi_atomic_read(®ion->info); + info.value = mi_atomic_load_acquire(®ion->info); mi_assert_internal(info.value != 0); void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? @@ -442,21 +442,21 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re -----------------------------------------------------------------------------*/ void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. - uintptr_t rcount = mi_atomic_read_relaxed(®ions_count); + uintptr_t rcount = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < rcount; i++) { mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->info) != 0) { + if (mi_atomic_load_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region - uintptr_t m = mi_atomic_read_relaxed(®ion->in_use); - while (m == 0 && !mi_atomic_cas_weak(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; + uintptr_t m = mi_atomic_load_relaxed(®ion->in_use); + while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; if (m == 0) { // on success, free the whole region - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start); - size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); - uintptr_t commit = mi_atomic_read_relaxed(®ions[i].commit); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); + size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); + uintptr_t commit = mi_atomic_load_relaxed(®ions[i].commit); memset(®ions[i], 0, sizeof(mem_region_t)); // and release the whole region - mi_atomic_write(®ion->info, 0); + mi_atomic_store_release(®ion->info, 0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_abandoned_await_readers(); // ensure no pending reads _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats); diff --git a/src/segment.c b/src/segment.c index b5fd13d3..2416dadd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -628,17 +628,16 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ return NULL; } } - atomic_thread_fence(memory_order_acq_rel); segment->memid = memid; segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_committed = commit; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); mi_assert_internal(segment->mem_is_fixed ? segment->mem_is_committed : true); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan if (!pages_still_good) { // zero the segment info (but not the `mem` fields) - atomic_thread_fence(memory_order_release); // with read of `abandoned_next` in `mi_abandoned_pop` ptrdiff_t ofs = offsetof(mi_segment_t, next); memset((uint8_t*)segment + ofs, 0, info_size - ofs); @@ -792,7 +791,6 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); - atomic_thread_fence(memory_order_release); page->capacity = capacity; page->reserved = reserved; page->xblock_size = block_size; @@ -892,69 +890,69 @@ static mi_decl_cache_align _Atomic(uintptr_t) abandoned_readers; // = // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); - mi_segment_t* anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited); do { - mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, anext); - } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, &anext, segment)); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext); + } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment)); } // Move the visited list to the abandoned list. static bool mi_abandoned_visited_revisit(void) { // quick check if the visited list is empty - if (mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; + if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; // grab the whole visited list - mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); + mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL); if (first == NULL) return false; // first try to swap directly if the abandoned list happens to be NULL mi_tagged_segment_t afirst; - mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); if (mi_tagged_segment_ptr(ts)==NULL) { afirst = mi_tagged_segment(first, ts); - if (mi_atomic_cas_strong(&abandoned, &ts, afirst)) return true; + if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) return true; } // find the last element of the visited list: O(n) mi_segment_t* last = first; mi_segment_t* next; - while ((next = mi_atomic_read_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { + while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { last = next; } // and atomically prepend to the abandoned list // (no need to increase the readers as we don't access the abandoned segments) - mi_tagged_segment_t anext = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned); do { - mi_atomic_write_ptr(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); + mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); afirst = mi_tagged_segment(first, anext); - } while (!mi_atomic_cas_weak(&abandoned, &anext, afirst)); + } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst)); return true; } // Push on the abandoned list. static void mi_abandoned_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); mi_tagged_segment_t next; - mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); do { - mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); next = mi_tagged_segment(segment, ts); - } while (!mi_atomic_cas_weak(&abandoned, &ts, next)); + } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next)); } // Wait until there are no more pending reads on segments that used to be in the abandoned list void _mi_abandoned_await_readers(void) { uintptr_t n; do { - n = mi_atomic_read(&abandoned_readers); + n = mi_atomic_load_acquire(&abandoned_readers); if (n != 0) mi_atomic_yield(); } while (n != 0); } @@ -963,7 +961,7 @@ void _mi_abandoned_await_readers(void) { static mi_segment_t* mi_abandoned_pop(void) { mi_segment_t* segment; // Check efficiently if it is empty (or if the visited list needs to be moved) - mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); if (mi_likely(segment == NULL)) { if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL @@ -975,19 +973,19 @@ static mi_segment_t* mi_abandoned_pop(void) { // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. // (this is called from `region.c:_mi_mem_free` for example) - mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted + mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; - ts = mi_atomic_read(&abandoned); + ts = mi_atomic_load_acquire(&abandoned); do { segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { - mi_segment_t* anext = mi_atomic_read_ptr(mi_segment_t, &segment->abandoned_next); + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next); next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); - mi_atomic_decrement(&abandoned_readers); // release reader lock + mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock if (segment != NULL) { - mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, NULL); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); } return segment; } @@ -999,7 +997,7 @@ static mi_segment_t* mi_abandoned_pop(void) { static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed @@ -1013,7 +1011,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_visits = 0; - mi_atomic_write_ptr(mi_segment_t, &segment->abandoned_next, NULL); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); mi_abandoned_push(segment); } @@ -1077,7 +1075,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool // Reclaim a segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } segment->thread_id = _mi_thread_id(); @@ -1294,13 +1292,13 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block // huge page segments are always abandoned and can be freed immediately by any thread mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); mi_assert_internal(segment == _mi_page_segment(page)); - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id)==0); // claim it and free mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized. // paranoia: if this it the last reference, the cas should always succeed uintptr_t expected_tid = 0; - if (mi_atomic_cas_strong(&segment->thread_id, &expected_tid, heap->thread_id)) { + if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) { mi_block_set_next(page, block, page->free); page->free = block; page->used--; From 76a68cd7af539625dea3ce349aa7742bc02e1ebc Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 3 Sep 2020 09:45:53 -0700 Subject: [PATCH 11/11] bump version to 1.6.6 with new atomics --- include/mimalloc-atomic.h | 4 ++-- include/mimalloc.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index e1fdda16..e3e3186d 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -25,8 +25,8 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_memory_order(name) std::memory_order_##name #elif defined(_MSC_VER) // Use MSVC C wrapper for C11 atomics -#define _Atomic(tp) tp -#define ATOMIC_VAR_INIT(x) x +#define _Atomic(tp) tp +#define ATOMIC_VAR_INIT(x) x #define mi_atomic(name) mi_atomic_##name #define mi_memory_order(name) mi_memory_order_##name #else diff --git a/include/mimalloc.h b/include/mimalloc.h index f44f6d9a..4b0a911f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 164 // major + 2 digits minor +#define MI_MALLOC_VERSION 166 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes