bring inline with C11 atomics; no volatile and cas order of expected/desired

This commit is contained in:
daan 2020-07-25 22:52:27 -07:00
parent e27422adca
commit 09ade02429
12 changed files with 170 additions and 188 deletions

View file

@ -27,103 +27,99 @@ terms of the MIT license. A copy of the license can be found in the file
// Atomic operations specialized for mimalloc // Atomic operations specialized for mimalloc
// ------------------------------------------------------ // ------------------------------------------------------
// Atomically add a value; returns the previous value. Memory ordering is relaxed. // Atomically add a value; returns the previous value. Memory ordering is acquire-release.
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add); static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add);
// Atomically "and" a value; returns the previous value. Memory ordering is relaxed. // Atomically "and" a value; returns the previous value. Memory ordering is acquire-release.
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x);
// Atomically "or" a value; returns the previous value. Memory ordering is relaxed. // Atomically "or" a value; returns the previous value. Memory ordering is acquire-release.
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x);
// Atomically compare and exchange a value; returns `true` if successful. // Atomically compare and exchange a value; returns `true` if successful.
// May fail spuriously. Memory ordering as release on success, and relaxed on failure. // May fail spuriously. Memory ordering is acquire-release; with acquire on failure.
// (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired);
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
// Atomically compare and exchange a value; returns `true` if successful. // Atomically compare and exchange a value; returns `true` if successful.
// Memory ordering is acquire-release // Memory ordering is acquire-release; with acquire on failure.
// (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired);
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
// Atomically exchange a value. Memory ordering is acquire-release. // Atomically exchange a value. Memory ordering is acquire-release.
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange); static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange);
// Atomically read a value. Memory ordering is relaxed. // Atomically read a value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p); static inline uintptr_t mi_atomic_read_relaxed(const _Atomic(uintptr_t)* p);
// Atomically read a value. Memory ordering is acquire. // Atomically read a value. Memory ordering is acquire.
static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p); static inline uintptr_t mi_atomic_read(const _Atomic(uintptr_t)* p);
// Atomically write a value. Memory ordering is release. // Atomically write a value. Memory ordering is release.
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x);
// Yield // Yield
static inline void mi_atomic_yield(void); static inline void mi_atomic_yield(void);
// Atomically add a 64-bit value; returns the previous value. // Atomically add a 64-bit value; returns the previous value. Memory ordering is relaxed.
// Note: not using _Atomic(int64_t) as it is only used for statistics. // Note: not using _Atomic(int64_t) as it is only used for statistics.
static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add); static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add);
// Atomically update `*p` with the maximum of `*p` and `x` as a 64-bit value. // Atomically update `*p` with the maximum of `*p` and `x` as a 64-bit value.
// Returns the previous value. Note: not using _Atomic(int64_t) as it is only used for statistics. // Returns the previous value. Note: not using _Atomic(int64_t) as it is only used for statistics.
static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x); static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x);
// Atomically read a 64-bit value
// Note: not using _Atomic(int64_t) as it is only used for statistics.
static inline int64_t mi_atomic_readi64(volatile int64_t* p);
// Atomically subtract a value; returns the previous value. // Atomically subtract a value; returns the previous value.
static inline uintptr_t mi_atomic_sub(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { static inline uintptr_t mi_atomic_sub(_Atomic(uintptr_t)* p, uintptr_t sub) {
return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub))); return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub)));
} }
// Atomically increment a value; returns the incremented result. // Atomically increment a value; returns the incremented result.
static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) { static inline uintptr_t mi_atomic_increment(_Atomic(uintptr_t)* p) {
return mi_atomic_add(p, 1); return mi_atomic_add(p, 1);
} }
// Atomically decrement a value; returns the decremented result. // Atomically decrement a value; returns the decremented result.
static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { static inline uintptr_t mi_atomic_decrement(_Atomic(uintptr_t)* p) {
return mi_atomic_sub(p, 1); return mi_atomic_sub(p, 1);
} }
// Atomically add a signed value; returns the previous value. // Atomically add a signed value; returns the previous value.
static inline intptr_t mi_atomic_addi(volatile _Atomic(intptr_t)* p, intptr_t add) { static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add) {
return (intptr_t)mi_atomic_add((volatile _Atomic(uintptr_t)*)p, (uintptr_t)add); return (intptr_t)mi_atomic_add((_Atomic(uintptr_t)*)p, (uintptr_t)add);
} }
// Atomically subtract a signed value; returns the previous value. // Atomically subtract a signed value; returns the previous value.
static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t sub) { static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub) {
return (intptr_t)mi_atomic_addi(p,-sub); return (intptr_t)mi_atomic_addi(p,-sub);
} }
// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). // Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic).
#define mi_atomic_read_ptr_relaxed(T,p) \ #define mi_atomic_read_ptr_relaxed(T,p) \
(T*)(mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)(p))) (T*)(mi_atomic_read_relaxed((const _Atomic(uintptr_t)*)(p)))
// Atomically read a pointer; Memory order is acquire. // Atomically read a pointer; Memory order is acquire.
#define mi_atomic_read_ptr(T,p) \ #define mi_atomic_read_ptr(T,p) \
(T*)(mi_atomic_read((const volatile _Atomic(uintptr_t)*)(p))) (T*)(mi_atomic_read((const _Atomic(uintptr_t)*)(p)))
// Atomically write a pointer; Memory order is acquire. // Atomically write a pointer; Memory order is acquire.
#define mi_atomic_write_ptr(T,p,x) \ #define mi_atomic_write_ptr(T,p,x) \
mi_atomic_write((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x)) mi_atomic_write((_Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x))
static inline bool mi_atomic_cas_weak_voidp(_Atomic(void*)*p, void** expected, void* desired, void* unused) {
(void)(unused);
return mi_atomic_cas_weak((_Atomic(uintptr_t)*)p, (uintptr_t*)expected, (uintptr_t)desired);
}
// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. // Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously.
// Memory order is release. (like a write) // Memory order is release. (like a write)
// (Note: expected and desired are in opposite order from atomic_compare_exchange) #define mi_atomic_cas_ptr_weak(T,p,expected,desired) \
#define mi_atomic_cas_ptr_weak(T,p,desired,expected) \ mi_atomic_cas_weak_voidp((_Atomic(void*)*)(p), (void**)(expected), desired, *(expected))
mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected)))
// Atomically compare and exchange a pointer; returns `true` if successful. Memory order is acquire_release.
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
#define mi_atomic_cas_ptr_strong(T,p,desired,expected) \
mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)(p),(uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected)))
// Atomically exchange a pointer value. // Atomically exchange a pointer value.
#define mi_atomic_exchange_ptr(T,p,exchange) \ #define mi_atomic_exchange_ptr(T,p,exchange) \
(T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) (T*)mi_atomic_exchange((_Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange))
#if !defined(__cplusplus) && defined(_MSC_VER) #if !defined(__cplusplus) && defined(_MSC_VER)
@ -137,31 +133,38 @@ typedef LONG64 msc_intptr_t;
typedef LONG msc_intptr_t; typedef LONG msc_intptr_t;
#define MI_64(f) f #define MI_64(f) f
#endif #endif
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) {
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
} }
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) {
return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
} }
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) {
return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
} }
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) {
return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
if (read == *expected) {
return true;
} }
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { else {
return mi_atomic_cas_strong(p,desired,expected); *expected = read;
return false;
} }
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { }
static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) {
return mi_atomic_cas_strong(p,expected,desired);
}
static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) {
return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
} }
static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { static inline uintptr_t mi_atomic_read(_Atomic(uintptr_t) const* p) {
return *p; return *p;
} }
static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { static inline uintptr_t mi_atomic_read_relaxed(_Atomic(uintptr_t) const* p) {
return *p; return *p;
} }
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) {
#if defined(_M_IX86) || defined(_M_X64) #if defined(_M_IX86) || defined(_M_X64)
*p = x; *p = x;
#else #else
@ -171,9 +174,9 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x)
static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) {
YieldProcessor(); YieldProcessor();
} }
static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) { static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) {
#ifdef _WIN64 #ifdef _WIN64
mi_atomic_addi(p,add); return (int64_t)mi_atomic_addi((int64_t*)p,add);
#else #else
int64_t current; int64_t current;
int64_t sum; int64_t sum;
@ -181,84 +184,67 @@ static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) {
current = *p; current = *p;
sum = current + add; sum = current + add;
} while (_InterlockedCompareExchange64(p, sum, current) != current); } while (_InterlockedCompareExchange64(p, sum, current) != current);
return current;
#endif #endif
} }
static inline void mi_atomic_maxi64(volatile _Atomic(int64_t)*p, int64_t x) { static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
int64_t current; int64_t current;
do { do {
current = *p; current = *p;
} while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
} }
static inline int64_t mi_atomic_readi64(volatile _Atomic(int64_t)*p) {
#ifdef _WIN64
return *p;
#else
int64_t current;
do {
current = *p;
} while (_InterlockedCompareExchange64(p, current, current) != current);
return current;
#endif
}
#else #else
#ifdef __cplusplus #ifdef __cplusplus
#define MI_USING_STD using namespace std; #define MI_USING_STD using namespace std;
#else #else
#define MI_USING_STD #define MI_USING_STD
#endif #endif
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { static inline uintptr_t mi_atomic_add(_Atomic(uintptr_t)* p, uintptr_t add) {
MI_USING_STD MI_USING_STD
return atomic_fetch_add_explicit(p, add, memory_order_relaxed); return atomic_fetch_add_explicit(p, add, memory_order_acq_rel);
} }
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { static inline uintptr_t mi_atomic_and(_Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD MI_USING_STD
return atomic_fetch_and_explicit(p, x, memory_order_acq_rel); return atomic_fetch_and_explicit(p, x, memory_order_acq_rel);
} }
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { static inline uintptr_t mi_atomic_or(_Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD MI_USING_STD
return atomic_fetch_or_explicit(p, x, memory_order_acq_rel); return atomic_fetch_or_explicit(p, x, memory_order_acq_rel);
} }
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { static inline bool mi_atomic_cas_weak(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) {
MI_USING_STD MI_USING_STD
return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_acquire); return atomic_compare_exchange_weak_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire);
} }
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { static inline bool mi_atomic_cas_strong(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired) {
MI_USING_STD MI_USING_STD
return atomic_compare_exchange_strong_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_acquire); return atomic_compare_exchange_strong_explicit(p, expected, desired, memory_order_acq_rel, memory_order_acquire);
} }
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { static inline uintptr_t mi_atomic_exchange(_Atomic(uintptr_t)* p, uintptr_t exchange) {
MI_USING_STD MI_USING_STD
return atomic_exchange_explicit(p, exchange, memory_order_acq_rel); return atomic_exchange_explicit(p, exchange, memory_order_acq_rel);
} }
static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p) { static inline uintptr_t mi_atomic_read_relaxed(const _Atomic(uintptr_t)* p) {
MI_USING_STD MI_USING_STD
return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed); return atomic_load_explicit((_Atomic(uintptr_t)*) p, memory_order_relaxed);
} }
static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) { static inline uintptr_t mi_atomic_read(const _Atomic(uintptr_t)* p) {
MI_USING_STD MI_USING_STD
return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire); return atomic_load_explicit((_Atomic(uintptr_t)*) p, memory_order_acquire);
} }
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { static inline void mi_atomic_write(_Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD MI_USING_STD
return atomic_store_explicit(p, x, memory_order_release); return atomic_store_explicit(p, x, memory_order_release);
} }
static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add) { static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
MI_USING_STD MI_USING_STD
atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); return atomic_fetch_add_explicit((_Atomic(int64_t)*)p, add, memory_order_relaxed);
} }
static inline int64_t mi_atomic_readi64(volatile int64_t* p) { static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
MI_USING_STD MI_USING_STD
return atomic_load_explicit((volatile _Atomic(int64_t)*) p, memory_order_relaxed); int64_t current = atomic_load_explicit((_Atomic(int64_t)*)p, memory_order_relaxed);
} while (current < x && !atomic_compare_exchange_weak_explicit((_Atomic(int64_t)*)p, &current, x, memory_order_acq_rel, memory_order_acquire)) { /* nothing */ };
static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x) {
MI_USING_STD
int64_t current;
do {
current = mi_atomic_readi64(p);
} while (current < x && !atomic_compare_exchange_weak_explicit((volatile _Atomic(int64_t)*)p, &current, x, memory_order_acq_rel, memory_order_relaxed));
} }
#if defined(__cplusplus) #if defined(__cplusplus)

View file

@ -222,8 +222,8 @@ typedef struct mi_page_s {
uint32_t xblock_size; // size available in each block (always `>0`) uint32_t xblock_size; // size available in each block (always `>0`)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
volatile _Atomic(uintptr_t) xheap; _Atomic(uintptr_t) xheap;
struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
@ -262,7 +262,7 @@ typedef struct mi_segment_s {
// layout like this to optimize access in `mi_free` // layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment
mi_page_kind_t page_kind; // kind of pages: small, large, or huge mi_page_kind_t page_kind; // kind of pages: small, large, or huge
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
} mi_segment_t; } mi_segment_t;
@ -322,7 +322,7 @@ struct mi_heap_s {
mi_tld_t* tld; mi_tld_t* tld;
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
volatile _Atomic(mi_block_t*) thread_delayed_free; _Atomic(mi_block_t*) thread_delayed_free;
uintptr_t thread_id; // thread this heap belongs too uintptr_t thread_id; // thread this heap belongs too
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list

View file

@ -305,11 +305,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
} }
// Try to put the block on either the page-local thread free list, or the heap delayed free list. // Try to put the block on either the page-local thread free list, or the heap delayed free list.
mi_thread_free_t tfree;
mi_thread_free_t tfreex; mi_thread_free_t tfreex;
bool use_delayed; bool use_delayed;
mi_thread_free_t tfree = mi_atomic_read_relaxed(&page->xthread_free);
do { do {
tfree = mi_atomic_read_relaxed(&page->xthread_free);
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
if (mi_unlikely(use_delayed)) { if (mi_unlikely(use_delayed)) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list // unlikely: this only happens on the first concurrent free in a page that is in the full list
@ -320,7 +319,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
mi_block_set_next(page, block, mi_tf_block(tfree)); mi_block_set_next(page, block, mi_tf_block(tfree));
tfreex = mi_tf_set_block(tfree,block); tfreex = mi_tf_set_block(tfree,block);
} }
} while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex));
if (mi_unlikely(use_delayed)) { if (mi_unlikely(use_delayed)) {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
@ -328,19 +327,19 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
mi_assert_internal(heap != NULL); mi_assert_internal(heap != NULL);
if (heap != NULL) { if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree; mi_block_t* dfree = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do { do {
dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free);
mi_block_set_nextx(heap,block,dfree, heap->keys); mi_block_set_nextx(heap,block,dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, &dfree, block));
} }
// and reset the MI_DELAYED_FREEING flag // and reset the MI_DELAYED_FREEING flag
tfree = mi_atomic_read_relaxed(&page->xthread_free);
do { do {
tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free); tfreex = tfree;
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
} while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex));
} }
} }

View file

@ -63,7 +63,7 @@ typedef struct mi_arena_s {
bool is_zero_init; // is the arena zero initialized? bool is_zero_init; // is the arena zero initialized?
bool is_committed; // is the memory committed bool is_committed; // is the memory committed
bool is_large; // large OS page allocated bool is_large; // large OS page allocated
volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed?
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)

View file

@ -30,7 +30,7 @@ and that the sequence must be smaller or equal to the bits in a field.
#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set #define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set
// An atomic bitmap of `uintptr_t` fields // An atomic bitmap of `uintptr_t` fields
typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t; typedef _Atomic(uintptr_t) mi_bitmap_field_t;
typedef mi_bitmap_field_t* mi_bitmap_t; typedef mi_bitmap_field_t* mi_bitmap_t;
// A bitmap index is the index of the bit in a bitmap. // A bitmap index is the index of the bit in a bitmap.
@ -123,7 +123,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f
uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]);
if ((field & mask) == 0) { // free? if ((field & mask) == 0) { // free?
if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { if (mi_atomic_cas_strong(&bitmap[idx], &field, (field|mask))) {
// claimed! // claimed!
return true; return true;
} }
@ -137,7 +137,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f
static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{ {
mi_assert_internal(bitmap_idx != NULL); mi_assert_internal(bitmap_idx != NULL);
volatile _Atomic(uintptr_t)* field = &bitmap[idx]; _Atomic(uintptr_t)* field = &bitmap[idx];
uintptr_t map = mi_atomic_read(field); uintptr_t map = mi_atomic_read(field);
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
@ -158,9 +158,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
mi_assert_internal((m >> bitidx) == mask); // no overflow? mi_assert_internal((m >> bitidx) == mask); // no overflow?
const uintptr_t newmap = map | m; const uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask); mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? if (!mi_atomic_cas_weak(field, &map, newmap)) { // TODO: use strong cas here?
// no success, another thread claimed concurrently.. keep going // no success, another thread claimed concurrently.. keep going (with updated `map`)
map = mi_atomic_read(field);
continue; continue;
} }
else { else {

View file

@ -217,7 +217,7 @@ static void mi_out_buf_stderr(const char* msg, void* arg) {
// For now, don't register output from multiple threads. // For now, don't register output from multiple threads.
#pragma warning(suppress:4180) #pragma warning(suppress:4180)
static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* volatile mi_out_default; // = NULL
static volatile _Atomic(void*) mi_out_arg; // = NULL static _Atomic(void*) mi_out_arg; // = NULL
static mi_output_fun* mi_out_get_default(void** parg) { static mi_output_fun* mi_out_get_default(void** parg) {
if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); } if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); }
@ -241,7 +241,7 @@ static void mi_add_stderr_output() {
// -------------------------------------------------------- // --------------------------------------------------------
// Messages, all end up calling `_mi_fputs`. // Messages, all end up calling `_mi_fputs`.
// -------------------------------------------------------- // --------------------------------------------------------
static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings static _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings
// When overriding malloc, we may recurse into mi_vfprintf if an allocation // When overriding malloc, we may recurse into mi_vfprintf if an allocation
// inside the C runtime causes another message. // inside the C runtime causes another message.
@ -339,7 +339,7 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co
// -------------------------------------------------------- // --------------------------------------------------------
static mi_error_fun* volatile mi_error_handler; // = NULL static mi_error_fun* volatile mi_error_handler; // = NULL
static volatile _Atomic(void*) mi_error_arg; // = NULL static _Atomic(void*) mi_error_arg; // = NULL
static void mi_error_default(int err) { static void mi_error_default(int err) {
UNUSED(err); UNUSED(err);

View file

@ -266,7 +266,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
mi_assert_internal(!(large_only && !allow_large)); mi_assert_internal(!(large_only && !allow_large));
static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; static _Atomic(uintptr_t) large_page_try_ok; // = 0;
void* p = NULL; void* p = NULL;
if ((large_only || use_large_os_page(size, try_alignment)) if ((large_only || use_large_os_page(size, try_alignment))
&& allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
@ -274,7 +274,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
if (!large_only && try_ok > 0) { if (!large_only && try_ok > 0) {
// if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
// therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); mi_atomic_cas_strong(&large_page_try_ok, &try_ok, try_ok - 1);
} }
else { else {
// large OS pages must always reserve and commit. // large OS pages must always reserve and commit.
@ -360,14 +360,14 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
fd = VM_MAKE_TAG(os_tag); fd = VM_MAKE_TAG(os_tag);
#endif #endif
if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; static _Atomic(uintptr_t) large_page_try_ok; // = 0;
uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
if (!large_only && try_ok > 0) { if (!large_only && try_ok > 0) {
// If the OS is not configured for large OS pages, or the user does not have // If the OS is not configured for large OS pages, or the user does not have
// enough permission, the `mmap` will always fail (but it might also fail for other reasons). // enough permission, the `mmap` will always fail (but it might also fail for other reasons).
// Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times
// to avoid too many failing calls to mmap. // to avoid too many failing calls to mmap.
mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); mi_atomic_cas_strong(&large_page_try_ok, &try_ok, try_ok - 1);
} }
else { else {
int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux
@ -449,7 +449,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
// On 64-bit systems, we can do efficient aligned allocation by using // On 64-bit systems, we can do efficient aligned allocation by using
// the 4TiB to 30TiB area to allocate them. // the 4TiB to 30TiB area to allocate them.
#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base; static mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
// Return a 4MiB aligned address that is probably available // Return a 4MiB aligned address that is probably available
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
@ -462,7 +462,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif #endif
mi_atomic_cas_strong(&aligned_base, init, hint + size); uintptr_t expected = hint + size;
mi_atomic_cas_strong(&aligned_base, &expected, init);
hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
} }
if (hint%try_alignment != 0) return NULL; if (hint%try_alignment != 0) return NULL;
@ -969,9 +970,9 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
uintptr_t start = 0; uintptr_t start = 0;
uintptr_t end = 0; uintptr_t end = 0;
uintptr_t expected; uintptr_t huge_start = mi_atomic_read_relaxed(&mi_huge_start);
do { do {
start = expected = mi_atomic_read_relaxed(&mi_huge_start); start = huge_start;
if (start == 0) { if (start == 0) {
// Initialize the start address after the 32TiB area // Initialize the start address after the 32TiB area
start = ((uintptr_t)32 << 40); // 32TiB virtual start address start = ((uintptr_t)32 << 40); // 32TiB virtual start address
@ -982,7 +983,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
} }
end = start + size; end = start + size;
mi_assert_internal(end % MI_SEGMENT_SIZE == 0); mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
} while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); } while (!mi_atomic_cas_strong(&mi_huge_start, &huge_start, end));
if (total_size != NULL) *total_size = size; if (total_size != NULL) *total_size = size;
return (uint8_t*)start; return (uint8_t*)start;

View file

@ -122,11 +122,11 @@ bool _mi_page_is_valid(mi_page_t* page) {
#endif #endif
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
mi_thread_free_t tfree;
mi_thread_free_t tfreex; mi_thread_free_t tfreex;
mi_delayed_t old_delay; mi_delayed_t old_delay;
mi_thread_free_t tfree;
do { do {
tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
tfreex = mi_tf_set_delayed(tfree, delay); tfreex = mi_tf_set_delayed(tfree, delay);
old_delay = mi_tf_delayed(tfree); old_delay = mi_tf_delayed(tfree);
if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
@ -140,7 +140,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
break; // leave never-delayed flag set break; // leave never-delayed flag set
} }
} while ((old_delay == MI_DELAYED_FREEING) || } while ((old_delay == MI_DELAYED_FREEING) ||
!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); !mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex));
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------
@ -154,13 +154,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
static void _mi_page_thread_free_collect(mi_page_t* page) static void _mi_page_thread_free_collect(mi_page_t* page)
{ {
mi_block_t* head; mi_block_t* head;
mi_thread_free_t tfree;
mi_thread_free_t tfreex; mi_thread_free_t tfreex;
mi_thread_free_t tfree = mi_atomic_read_relaxed(&page->xthread_free);
do { do {
tfree = mi_atomic_read_relaxed(&page->xthread_free);
head = mi_tf_block(tfree); head = mi_tf_block(tfree);
tfreex = mi_tf_set_block(tfree,NULL); tfreex = mi_tf_set_block(tfree,NULL);
} while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } while (!mi_atomic_cas_weak(&page->xthread_free, &tfree, tfreex));
// return if the list is empty // return if the list is empty
if (head == NULL) return; if (head == NULL) return;
@ -273,11 +272,9 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
(put there by other threads if they deallocated in a full page) (put there by other threads if they deallocated in a full page)
----------------------------------------------------------- */ ----------------------------------------------------------- */
void _mi_heap_delayed_free(mi_heap_t* heap) { void _mi_heap_delayed_free(mi_heap_t* heap) {
// take over the list (note: no atomic exchange is it is often NULL) // take over the list (note: no atomic exchange since it is often NULL)
mi_block_t* block; mi_block_t* block = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do { while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };
block = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free);
} while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, NULL, block));
// and free them all // and free them all
while(block != NULL) { while(block != NULL) {
@ -286,11 +283,10 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
if (!_mi_free_delayed_block(block)) { if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet // we might already start delayed freeing while another thread has not yet
// reset the delayed_freeing flag; in that case delay it further by reinserting. // reset the delayed_freeing flag; in that case delay it further by reinserting.
mi_block_t* dfree; mi_block_t* dfree = mi_atomic_read_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do { do {
dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free);
mi_block_set_nextx(heap, block, dfree, heap->keys); mi_block_set_nextx(heap, block, dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, &dfree, block));
} }
block = next; block = next;
} }
@ -734,7 +730,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
----------------------------------------------------------- */ ----------------------------------------------------------- */
static mi_deferred_free_fun* volatile deferred_free = NULL; static mi_deferred_free_fun* volatile deferred_free = NULL;
static volatile _Atomic(void*) deferred_arg; // = NULL static _Atomic(void*) deferred_arg; // = NULL
void _mi_deferred_free(mi_heap_t* heap, bool force) { void _mi_deferred_free(mi_heap_t* heap, bool force) {
heap->tld->heartbeat++; heap->tld->heartbeat++;

View file

@ -200,7 +200,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
#ifndef GRND_NONBLOCK #ifndef GRND_NONBLOCK
#define GRND_NONBLOCK (1) #define GRND_NONBLOCK (1)
#endif #endif
static volatile _Atomic(uintptr_t) no_getrandom; // = 0 static _Atomic(uintptr_t) no_getrandom; // = 0
if (mi_atomic_read(&no_getrandom)==0) { if (mi_atomic_read(&no_getrandom)==0) {
ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
if (ret >= 0) return (buf_len == (size_t)ret); if (ret >= 0) return (buf_len == (size_t)ret);

View file

@ -86,13 +86,13 @@ typedef union mi_region_info_u {
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s { typedef struct mem_region_s {
volatile _Atomic(uintptr_t) info; // mi_region_info_t.value _Atomic(uintptr_t) info; // mi_region_info_t.value
volatile _Atomic(void*) start; // start of the memory area _Atomic(void*) start; // start of the memory area
mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t in_use; // bit per in-use block
mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t dirty; // track if non-zero per block
mi_bitmap_field_t commit; // track if committed per block mi_bitmap_field_t commit; // track if committed per block
mi_bitmap_field_t reset; // track if reset per block mi_bitmap_field_t reset; // track if reset per block
volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena
uintptr_t padding; // round to 8 fields uintptr_t padding; // round to 8 fields
} mem_region_t; } mem_region_t;
@ -100,7 +100,7 @@ typedef struct mem_region_s {
static mem_region_t regions[MI_REGION_MAX]; static mem_region_t regions[MI_REGION_MAX];
// Allocated regions // Allocated regions
static volatile _Atomic(uintptr_t) regions_count; // = 0; static _Atomic(uintptr_t) regions_count; // = 0;
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
@ -447,10 +447,8 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
mem_region_t* region = &regions[i]; mem_region_t* region = &regions[i];
if (mi_atomic_read_relaxed(&region->info) != 0) { if (mi_atomic_read_relaxed(&region->info) != 0) {
// if no segments used, try to claim the whole region // if no segments used, try to claim the whole region
uintptr_t m; uintptr_t m = mi_atomic_read_relaxed(&region->in_use);
do { while (m == 0 && !mi_atomic_cas_weak(&region->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
m = mi_atomic_read_relaxed(&region->in_use);
} while(m == 0 && !mi_atomic_cas_weak(&region->in_use, MI_BITMAP_FIELD_FULL, 0 ));
if (m == 0) { if (m == 0) {
// on success, free the whole region // on success, free the whole region
uint8_t* start = mi_atomic_read_ptr(uint8_t,&regions[i].start); uint8_t* start = mi_atomic_read_ptr(uint8_t,&regions[i].start);

View file

@ -877,15 +877,15 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se
// This is a list of visited abandoned pages that were full at the time. // This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of // this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited. // this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL
// The abandoned page list (tagged as it supports pop) // The abandoned page list (tagged as it supports pop)
static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// We also maintain a count of current readers of the abandoned list // We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might // in order to prevent resetting/decommitting segment memory if it might
// still be read. // still be read.
static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0 static mi_decl_cache_align _Atomic(uintptr_t) abandoned_readers; // = 0
// Push on the visited list // Push on the visited list
static void mi_abandoned_visited_push(mi_segment_t* segment) { static void mi_abandoned_visited_push(mi_segment_t* segment) {
@ -893,11 +893,10 @@ static void mi_abandoned_visited_push(mi_segment_t* segment) {
mi_assert_internal(segment->abandoned_next == NULL); mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_internal(segment->used > 0); mi_assert_internal(segment->used > 0);
mi_segment_t* anext; mi_segment_t* anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited);
do { do {
anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited);
segment->abandoned_next = anext; segment->abandoned_next = anext;
} while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext)); } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, &anext, segment));
} }
// Move the visited list to the abandoned list. // Move the visited list to the abandoned list.
@ -911,11 +910,11 @@ static bool mi_abandoned_visited_revisit(void)
if (first == NULL) return false; if (first == NULL) return false;
// first try to swap directly if the abandoned list happens to be NULL // first try to swap directly if the abandoned list happens to be NULL
const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
mi_tagged_segment_t afirst; mi_tagged_segment_t afirst;
mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
if (mi_tagged_segment_ptr(ts)==NULL) { if (mi_tagged_segment_ptr(ts)==NULL) {
afirst = mi_tagged_segment(first, ts); afirst = mi_tagged_segment(first, ts);
if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true; if (mi_atomic_cas_strong(&abandoned, &ts, afirst)) return true;
} }
// find the last element of the visited list: O(n) // find the last element of the visited list: O(n)
@ -926,12 +925,11 @@ static bool mi_abandoned_visited_revisit(void)
// and atomically prepend to the abandoned list // and atomically prepend to the abandoned list
// (no need to increase the readers as we don't access the abandoned segments) // (no need to increase the readers as we don't access the abandoned segments)
mi_tagged_segment_t anext; mi_tagged_segment_t anext = mi_atomic_read_relaxed(&abandoned);
do { do {
anext = mi_atomic_read_relaxed(&abandoned);
last->abandoned_next = mi_tagged_segment_ptr(anext); last->abandoned_next = mi_tagged_segment_ptr(anext);
afirst = mi_tagged_segment(first, anext); afirst = mi_tagged_segment(first, anext);
} while (!mi_atomic_cas_weak(&abandoned, afirst, anext)); } while (!mi_atomic_cas_weak(&abandoned, &anext, afirst));
return true; return true;
} }
@ -941,13 +939,12 @@ static void mi_abandoned_push(mi_segment_t* segment) {
mi_assert_internal(segment->abandoned_next == NULL); mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_internal(segment->used > 0); mi_assert_internal(segment->used > 0);
mi_tagged_segment_t ts;
mi_tagged_segment_t next; mi_tagged_segment_t next;
mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
do { do {
ts = mi_atomic_read_relaxed(&abandoned);
segment->abandoned_next = mi_tagged_segment_ptr(ts); segment->abandoned_next = mi_tagged_segment_ptr(ts);
next = mi_tagged_segment(segment, ts); next = mi_tagged_segment(segment, ts);
} while (!mi_atomic_cas_weak(&abandoned, next, ts)); } while (!mi_atomic_cas_weak(&abandoned, &ts, next));
} }
// Wait until there are no more pending reads on segments that used to be in the abandoned list // Wait until there are no more pending reads on segments that used to be in the abandoned list
@ -977,13 +974,13 @@ static mi_segment_t* mi_abandoned_pop(void) {
// (this is called from `memory.c:_mi_mem_free` for example) // (this is called from `memory.c:_mi_mem_free` for example)
mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted
mi_tagged_segment_t next = 0; mi_tagged_segment_t next = 0;
do {
ts = mi_atomic_read(&abandoned); ts = mi_atomic_read(&abandoned);
do {
segment = mi_tagged_segment_ptr(ts); segment = mi_tagged_segment_ptr(ts);
if (segment != NULL) { if (segment != NULL) {
next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
} }
} while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts)); } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, &ts, next));
mi_atomic_decrement(&abandoned_readers); // release reader lock mi_atomic_decrement(&abandoned_readers); // release reader lock
if (segment != NULL) { if (segment != NULL) {
segment->abandoned_next = NULL; segment->abandoned_next = NULL;
@ -1298,7 +1295,8 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
// claim it and free // claim it and free
mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized. mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized.
// paranoia: if this it the last reference, the cas should always succeed // paranoia: if this it the last reference, the cas should always succeed
if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { uintptr_t expected_tid = 0;
if (mi_atomic_cas_strong(&segment->thread_id, &expected_tid, heap->thread_id)) {
mi_block_set_next(page, block, page->free); mi_block_set_next(page, block, page->free);
page->free = block; page->free = block;
page->used--; page->used--;
@ -1315,6 +1313,11 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
mi_segments_track_size((long)segment->segment_size, &tld->segments); mi_segments_track_size((long)segment->segment_size, &tld->segments);
_mi_segment_page_free(page, true, &tld->segments); _mi_segment_page_free(page, true, &tld->segments);
} }
#if (MI_DEBUG!=0)
else {
mi_assert_internal(false);
}
#endif
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------

View file

@ -26,13 +26,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
if (mi_is_in_main(stat)) if (mi_is_in_main(stat))
{ {
// add atomically (for abandoned pages) // add atomically (for abandoned pages)
mi_atomic_addi64(&stat->current,amount); int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_maxi64(&stat->peak, mi_atomic_readi64(&stat->current)); mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) { if (amount > 0) {
mi_atomic_addi64(&stat->allocated,amount); mi_atomic_addi64_relaxed(&stat->allocated,amount);
} }
else { else {
mi_atomic_addi64(&stat->freed, -amount); mi_atomic_addi64_relaxed(&stat->freed, -amount);
} }
} }
else { else {
@ -50,8 +50,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
if (mi_is_in_main(stat)) { if (mi_is_in_main(stat)) {
mi_atomic_addi64( &stat->count, 1 ); mi_atomic_addi64_relaxed( &stat->count, 1 );
mi_atomic_addi64( &stat->total, (int64_t)amount ); mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount );
} }
else { else {
stat->count++; stat->count++;
@ -71,17 +71,17 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) {
if (stat==src) return; if (stat==src) return;
if (src->allocated==0 && src->freed==0) return; if (src->allocated==0 && src->freed==0) return;
mi_atomic_addi64( &stat->allocated, src->allocated * unit); mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit);
mi_atomic_addi64( &stat->current, src->current * unit); mi_atomic_addi64_relaxed( &stat->current, src->current * unit);
mi_atomic_addi64( &stat->freed, src->freed * unit); mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit);
// peak scores do not work across threads.. // peak scores do not work across threads..
mi_atomic_addi64( &stat->peak, src->peak * unit); mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit);
} }
static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) {
if (stat==src) return; if (stat==src) return;
mi_atomic_addi64( &stat->total, src->total * unit); mi_atomic_addi64_relaxed( &stat->total, src->total * unit);
mi_atomic_addi64( &stat->count, src->count * unit); mi_atomic_addi64_relaxed( &stat->count, src->count * unit);
} }
// must be thread safe as it is called from stats_merge // must be thread safe as it is called from stats_merge