merge from dev-atomic with new atomic interface

This commit is contained in:
daan 2020-09-03 12:13:09 -07:00
commit 03071dec0f
20 changed files with 410 additions and 415 deletions

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
Copyright (c) 2018,2020 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -8,127 +8,99 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_ATOMIC_H
#define MIMALLOC_ATOMIC_H
// ------------------------------------------------------
// --------------------------------------------------------------------------------------------
// Atomics
// We need to be portable between C, C++, and MSVC.
// ------------------------------------------------------
// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
// To gain better insight in the range of used atomics, we use explicitly named memory order operations
// instead of passing the memory order as a parameter.
// -----------------------------------------------------------------------------------------------
#if defined(_MSC_VER)
#define _Atomic(tp) tp
#define ATOMIC_VAR_INIT(x) x
#elif defined(__cplusplus)
#if defined(__cplusplus)
// Use C++ atomics
#include <atomic>
#define _Atomic(tp) std::atomic<tp>
#define _Atomic(tp) std::atomic<tp>
#define mi_atomic(name) std::atomic_##name
#define mi_memory_order(name) std::memory_order_##name
#elif defined(_MSC_VER)
// Use MSVC C wrapper for C11 atomics
#define _Atomic(tp) tp
#define ATOMIC_VAR_INIT(x) x
#define mi_atomic(name) mi_atomic_##name
#define mi_memory_order(name) mi_memory_order_##name
#else
// Use C11 atomics
#include <stdatomic.h>
#define mi_atomic(name) atomic_##name
#define mi_memory_order(name) memory_order_##name
#endif
// ------------------------------------------------------
// Atomic operations specialized for mimalloc
// ------------------------------------------------------
// Various defines for all used memory orders in mimalloc
#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \
mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail)
// Atomically add a value; returns the previous value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add);
#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \
mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail)
// Atomically "and" a value; returns the previous value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x);
#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
// Atomically "or" a value; returns the previous value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x);
#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
// Atomically compare and exchange a value; returns `true` if successful.
// May fail spuriously. Memory ordering as release on success, and relaxed on failure.
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,1)
#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,1)
#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,1)
#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,1)
// Atomically compare and exchange a value; returns `true` if successful.
// Memory ordering is acquire-release
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
// Atomically exchange a value. Memory ordering is acquire-release.
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange);
// Atomically read a value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p);
// Atomically read a value. Memory ordering is acquire.
static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p);
// Atomically write a value. Memory ordering is release.
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x);
// Yield
static inline void mi_atomic_yield(void);
static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add);
static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub);
// Atomically add a 64-bit value; returns the previous value.
// Note: not using _Atomic(int64_t) as it is only used for statistics.
static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add);
// Atomically update `*p` with the maximum of `*p` and `x` as a 64-bit value.
// Returns the previous value. Note: not using _Atomic(int64_t) as it is only used for statistics.
static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x);
#if defined(__cplusplus) || !defined(_MSC_VER)
// Atomically read a 64-bit value
// Note: not using _Atomic(int64_t) as it is only used for statistics.
static inline int64_t mi_atomic_readi64(volatile int64_t* p);
// In C++/C11 atomics we have polymorpic atomics so can use the typed `ptr` variants
// (where `tp` is the type of atomic value)
// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well
#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p)
#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p)
#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x)
#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x)
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
// Atomically subtract a value; returns the previous value.
static inline uintptr_t mi_atomic_sub(volatile _Atomic(uintptr_t)* p, uintptr_t sub) {
return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub)));
// These are used by the statistics
static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
}
static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, &current, x)) { /* nothing */ };
}
// Atomically increment a value; returns the incremented result.
static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) {
return mi_atomic_add(p, 1);
}
// Atomically decrement a value; returns the decremented result.
static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) {
return mi_atomic_sub(p, 1);
}
#elif defined(_MSC_VER)
// Atomically add a signed value; returns the previous value.
static inline intptr_t mi_atomic_addi(volatile _Atomic(intptr_t)* p, intptr_t add) {
return (intptr_t)mi_atomic_add((volatile _Atomic(uintptr_t)*)p, (uintptr_t)add);
}
// Atomically subtract a signed value; returns the previous value.
static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t sub) {
return (intptr_t)mi_atomic_addi(p,-sub);
}
// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic).
#define mi_atomic_read_ptr_relaxed(T,p) \
(T*)(mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)(p)))
// Atomically read a pointer; Memory order is acquire.
#define mi_atomic_read_ptr(T,p) \
(T*)(mi_atomic_read((const volatile _Atomic(uintptr_t)*)(p)))
// Atomically write a pointer; Memory order is acquire.
#define mi_atomic_write_ptr(T,p,x) \
mi_atomic_write((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x))
// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously.
// Memory order is release. (like a write)
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
#define mi_atomic_cas_ptr_weak(T,p,desired,expected) \
mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected)))
// Atomically compare and exchange a pointer; returns `true` if successful. Memory order is acquire_release.
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
#define mi_atomic_cas_ptr_strong(T,p,desired,expected) \
mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)(p),(uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected)))
// Atomically exchange a pointer value.
#define mi_atomic_exchange_ptr(T,p,exchange) \
(T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange))
#ifdef _MSC_VER
// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <Windows.h>
#include <intrin.h>
#ifdef _WIN64
typedef LONG64 msc_intptr_t;
@ -137,43 +109,71 @@ typedef LONG64 msc_intptr_t;
typedef LONG msc_intptr_t;
#define MI_64(f) f
#endif
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) {
typedef enum mi_memory_order_e {
mi_memory_order_relaxed,
mi_memory_order_consume,
mi_memory_order_acquire,
mi_memory_order_release,
mi_memory_order_acq_rel,
mi_memory_order_seq_cst
} mi_memory_order;
static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)* p, uintptr_t add, mi_memory_order mo) {
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
}
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) {
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
}
static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) {
return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
}
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) {
return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
}
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
if (read == *expected) {
return true;
}
else {
*expected = read;
return false;
}
}
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
return mi_atomic_cas_strong(p,desired,expected);
static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2);
}
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)* p, uintptr_t exchange, mi_memory_order mo) {
return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
}
static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) {
return *p;
static inline mi_atomic_thread_fence(mi_memory_order mo) {
_Atomic(uintptr_t)x = 0;
mi_atomic_exchange_explicit(&x, 1, mo);
}
static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) {
static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
#if defined(_M_IX86) || defined(_M_X64)
return *p;
#else
uintptr_t x = *p;
if (mo > mi_memory_order_relaxed) {
while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
}
return x;
#endif
}
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) {
#if defined(_M_IX86) || defined(_M_X64)
*p = x;
#else
mi_atomic_exchange(p,x);
mi_atomic_exchange_explicit(p,x,mo);
#endif
}
static inline void mi_atomic_yield(void) {
YieldProcessor();
}
static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) {
// These are used by the statistics
static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) {
#ifdef _WIN64
mi_atomic_addi(p,add);
return (int64_t)mi_atomic_addi((int64_t*)p,add);
#else
int64_t current;
int64_t sum;
@ -181,91 +181,52 @@ static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) {
current = *p;
sum = current + add;
} while (_InterlockedCompareExchange64(p, sum, current) != current);
return current;
#endif
}
static inline void mi_atomic_maxi64(volatile _Atomic(int64_t)*p, int64_t x) {
static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
int64_t current;
do {
current = *p;
} while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
}
static inline int64_t mi_atomic_readi64(volatile _Atomic(int64_t)*p) {
#ifdef _WIN64
return *p;
#else
int64_t current;
do {
current = *p;
} while (_InterlockedCompareExchange64(p, current, current) != current);
return current;
#endif
}
// The pointer macros cast to `uintptr_t`.
#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p))
#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p))
#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#else
#ifdef __cplusplus
#define MI_USING_STD using namespace std;
#else
#define MI_USING_STD
#endif
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) {
MI_USING_STD
return atomic_fetch_add_explicit(p, add, memory_order_relaxed);
}
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD
return atomic_fetch_and_explicit(p, x, memory_order_acq_rel);
}
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD
return atomic_fetch_or_explicit(p, x, memory_order_acq_rel);
}
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
MI_USING_STD
return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_acquire);
}
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
MI_USING_STD
return atomic_compare_exchange_strong_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_acquire);
}
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
MI_USING_STD
return atomic_exchange_explicit(p, exchange, memory_order_acq_rel);
}
static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p) {
MI_USING_STD
return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed);
}
static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) {
MI_USING_STD
return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire);
}
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD
return atomic_store_explicit(p, x, memory_order_release);
}
static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add) {
MI_USING_STD
atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed);
}
static inline int64_t mi_atomic_readi64(volatile int64_t* p) {
MI_USING_STD
return atomic_load_explicit((volatile _Atomic(int64_t)*) p, memory_order_relaxed);
}
static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x) {
MI_USING_STD
int64_t current;
do {
current = mi_atomic_readi64(p);
} while (current < x && !atomic_compare_exchange_weak_explicit((volatile _Atomic(int64_t)*)p, &current, x, memory_order_acq_rel, memory_order_relaxed));
// Atomically add a signed value; returns the previous value.
static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) {
return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add);
}
// Atomically subtract a signed value; returns the previous value.
static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
return (intptr_t)mi_atomic_addi(p, -sub);
}
// Yield
#if defined(__cplusplus)
#include <thread>
static inline void mi_atomic_yield(void) {
std::this_thread::yield();
}
#include <thread>
static inline void mi_atomic_yield(void) {
std::this_thread::yield();
}
#elif defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
static inline void mi_atomic_yield(void) {
YieldProcessor();
}
#elif (defined(__GNUC__) || defined(__clang__)) && \
(defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))
#if defined(__x86_64__) || defined(__i386__)
@ -278,17 +239,16 @@ static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x) {
}
#endif
#elif defined(__wasi__)
#include <sched.h>
static inline void mi_atomic_yield(void) {
sched_yield();
}
#include <sched.h>
static inline void mi_atomic_yield(void) {
sched_yield();
}
#else
#include <unistd.h>
static inline void mi_atomic_yield(void) {
sleep(0);
}
#include <unistd.h>
static inline void mi_atomic_yield(void) {
sleep(0);
}
#endif
#endif
#endif // __MIMALLOC_ATOMIC_H

View file

@ -467,21 +467,21 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
// Thread free access
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3);
return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
}
static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3);
return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3);
}
// Heap access
static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap));
return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
}
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
mi_atomic_write(&page->xheap,(uintptr_t)heap);
mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
}
// Thread free flag helpers

View file

@ -14,7 +14,9 @@ terms of the MIT license. A copy of the license can be found in the file
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
#ifndef MI_MAX_ALIGN_SIZE
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
#endif
// ------------------------------------------------------
// Variants
@ -160,6 +162,7 @@ typedef enum mi_delayed_e {
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
#if !MI_TSAN
typedef union mi_page_flags_s {
uint8_t full_aligned;
struct {
@ -167,6 +170,16 @@ typedef union mi_page_flags_s {
uint8_t has_aligned : 1;
} x;
} mi_page_flags_t;
#else
// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
typedef union mi_page_flags_s {
uint16_t full_aligned;
struct {
uint8_t in_full;
uint8_t has_aligned;
} x;
} mi_page_flags_t;
#endif
// Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
@ -226,12 +239,12 @@ typedef struct mi_page_s {
uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
uint32_t xblock_size; // size available in each block (always `>0`)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
volatile _Atomic(uintptr_t) xheap;
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap;
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
#if MI_INTPTR_SIZE==8
@ -277,10 +290,11 @@ typedef struct mi_segment_s {
uintptr_t decommit_mask;
uintptr_t commit_mask;
// from here is zero initialized
struct mi_segment_s* next; // the list of freed segments in the cache
struct mi_segment_s* abandoned_next;
_Atomic(struct mi_segment_s*) abandoned_next;
// from here is zero initialized
struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
size_t used; // count of pages in use
@ -291,7 +305,7 @@ typedef struct mi_segment_s {
// layout like this to optimize access in `mi_free`
mi_segment_kind_t kind;
volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment
_Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment
size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
mi_slice_t slices[MI_SLICES_PER_SEGMENT];
} mi_segment_t;
@ -351,7 +365,7 @@ struct mi_heap_s {
mi_tld_t* tld;
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
volatile _Atomic(mi_block_t*) thread_delayed_free;
_Atomic(mi_block_t*) thread_delayed_free;
uintptr_t thread_id; // thread this heap belongs too
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list

View file

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H
#define MIMALLOC_H
#define MI_MALLOC_VERSION 164 // major + 2 digits minor
#define MI_MALLOC_VERSION 166 // major + 2 digits minor
// ------------------------------------------------------
// Compiler specific attributes
@ -192,7 +192,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);;
mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);
mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc;
@ -256,7 +256,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
// Experimental
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;