diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 1b306181..55ea3781 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -134,7 +134,8 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); } -static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, int64_t add) { +static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) { + const int64_t add = mi_atomic_load_relaxed((_Atomic(int64_t)*)padd); if (add != 0) { mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); } diff --git a/readme.md b/readme.md index 1ea10883..66d0755e 100644 --- a/readme.md +++ b/readme.md @@ -12,7 +12,7 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release : `v3.0.2` (beta) (2025-03-06) +Latest release : `v3.0.2` (beta) (2025-03-06). Latest v2 release: `v2.2.2` (2025-03-06). Latest v1 release: `v1.9.2` (2024-03-06). @@ -87,12 +87,13 @@ Enjoy! * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile. - On Windows, use `mimalloc.lib` for the static library, and `mimalloc.dll` for the dynamic override (which used to be `mimalloc-override.dll`) -- and use `mimalloc-dll.lib` for the export library of `mimalloc.dll`. Upgrade redirect to v1.3.2. + Upgrade mimalloc-redirect to v1.3.2. `MI_OPT_ARCH` is off by default now but still assumes armv8.1-a on arm64 + for fast atomic operations. * 2025-01-03, `v1.8.9`, `v2.1.9`, `v3.0.1-alpha`: Interim release. Support Windows arm64. New [guarded](#guarded) build that can place OS guard pages behind objects to catch buffer overflows as they occur. Many small fixes: build on Windows arm64, cygwin, riscV, and dragonfly; fix Windows static library initialization to account for thread local destructors (in Rust/C++); macOS tag change; macOS TLS slot fix; improve stats; - consistent mimalloc.dll on Windows (instead of mimalloc-override.dll); fix mimalloc-redirect on Win11 H2; + consistent `mimalloc.dll` on Windows (instead of `mimalloc-override.dll`); fix mimalloc-redirect on Win11 H2; add 0-byte to canary; upstream CPython fixes; reduce .bss size; allow fixed TLS slot on Windows for improved performance. * 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches. diff --git a/src/stats.c b/src/stats.c index b40fa474..27dc69d0 100644 --- a/src/stats.c +++ b/src/stats.c @@ -92,23 +92,23 @@ void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) { // must be thread safe as it is called from stats_merge -static void mi_stat_count_add(mi_stat_count_t* stat, const mi_stat_count_t* src) { +static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) { if (stat==src) return; - mi_atomic_void_addi64_relaxed(&stat->total, src->total); - mi_atomic_void_addi64_relaxed(&stat->current, src->current); + mi_atomic_void_addi64_relaxed(&stat->total, &src->total); + mi_atomic_void_addi64_relaxed(&stat->current, &src->current); // peak scores do really not work across threads .. we just add them - mi_atomic_void_addi64_relaxed( &stat->peak, src->peak); + mi_atomic_void_addi64_relaxed( &stat->peak, &src->peak); // or, take the max? // mi_atomic_maxi64_relaxed(&stat->peak, src->peak); } -static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src) { +static void mi_stat_counter_add_mt(mi_stat_counter_t* stat, const mi_stat_counter_t* src) { if (stat==src) return; - if (src->total!=0) { mi_atomic_addi64_relaxed(&stat->total, src->total); } + mi_atomic_void_addi64_relaxed(&stat->total, &src->total); } -#define MI_STAT_COUNT(stat) mi_stat_count_add(&stats->stat, &src->stat); -#define MI_STAT_COUNTER(stat) mi_stat_counter_add(&stats->stat, &src->stat); +#define MI_STAT_COUNT(stat) mi_stat_count_add_mt(&stats->stat, &src->stat); +#define MI_STAT_COUNTER(stat) mi_stat_counter_add_mt(&stats->stat, &src->stat); // must be thread safe as it is called from stats_merge static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { @@ -119,11 +119,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { - mi_stat_count_add(&stats->malloc_bins[i], &src->malloc_bins[i]); + mi_stat_count_add_mt(&stats->malloc_bins[i], &src->malloc_bins[i]); } #endif for (size_t i = 0; i <= MI_BIN_HUGE; i++) { - mi_stat_count_add(&stats->page_bins[i], &src->page_bins[i]); + mi_stat_count_add_mt(&stats->page_bins[i], &src->page_bins[i]); } } @@ -318,8 +318,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_print(&stats->malloc_normal, "normal", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg); mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg); mi_stat_count_t total = { 0,0,0 }; - mi_stat_count_add(&total, &stats->malloc_normal); - mi_stat_count_add(&total, &stats->malloc_huge); + mi_stat_count_add_mt(&total, &stats->malloc_normal); + mi_stat_count_add_mt(&total, &stats->malloc_huge); mi_stat_print_ex(&total, "total", 1, out, arg, ""); #endif #if MI_STAT>1