merge from dev

2025-08-23 07:54:46 +03:00 · 2025-03-06 20:15:32 -08:00 · 2025-03-06 20:15:32 -08:00 · 26aade92cf
commit 26aade92cf
parent bd4ee09dc6 9eac969ea5
3 changed files with 18 additions and 16 deletions
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@ -129,7 +129,8 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
 static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
  return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
 }
-static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, int64_t add) {
+static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
+  const int64_t add = mi_atomic_load_relaxed((_Atomic(int64_t)*)padd);
  if (add != 0) {
    mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
  }
--- a/readme.md
+++ b/readme.md
@ -12,7 +12,7 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the runtime systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.

-Latest release   : `v3.0.2` (beta) (2025-03-06)
+Latest release   : `v3.0.2` (beta) (2025-03-06).  
 Latest v2 release: `v2.2.2` (2025-03-06).  
 Latest v1 release: `v1.9.2` (2024-03-06).

@ -87,12 +87,13 @@ Enjoy!
 * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. 
  Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. 
  Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile. 
-  On Windows, use `mimalloc.lib` for the static library, and `mimalloc.dll` for the dynamic override (which used to be `mimalloc-override.dll`) -- and use `mimalloc-dll.lib` for the export library of `mimalloc.dll`. Upgrade redirect to v1.3.2.  
+  Upgrade mimalloc-redirect to v1.3.2. `MI_OPT_ARCH` is off by default now but still assumes armv8.1-a on arm64
+  for fast atomic operations.
 * 2025-01-03, `v1.8.9`, `v2.1.9`, `v3.0.1-alpha`: Interim release. Support Windows arm64. New [guarded](#guarded) build that can place OS 
  guard pages behind objects to catch buffer overflows as they occur. 
  Many small fixes: build on Windows arm64, cygwin, riscV, and dragonfly; fix Windows static library initialization to account for
  thread local destructors (in Rust/C++); macOS tag change; macOS TLS slot fix; improve stats; 
-  consistent mimalloc.dll on Windows (instead of mimalloc-override.dll); fix mimalloc-redirect on Win11 H2; 
+  consistent `mimalloc.dll` on Windows (instead of `mimalloc-override.dll`); fix mimalloc-redirect on Win11 H2; 
  add 0-byte to canary; upstream CPython fixes; reduce .bss size; allow fixed TLS slot on Windows for improved performance.
 * 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches
  from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches.
--- a/src/stats.c
+++ b/src/stats.c
@ -63,23 +63,23 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {


 // must be thread safe as it is called from stats_merge
-static void mi_stat_count_add(mi_stat_count_t* stat, const mi_stat_count_t* src) {
+static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) {
  if (stat==src) return;
-  mi_atomic_void_addi64_relaxed(&stat->total, src->total); 
-  mi_atomic_void_addi64_relaxed(&stat->current, src->current); 
+  mi_atomic_void_addi64_relaxed(&stat->total, &src->total); 
+  mi_atomic_void_addi64_relaxed(&stat->current, &src->current); 
  // peak scores do really not work across threads .. we just add them
-  mi_atomic_void_addi64_relaxed( &stat->peak, src->peak);
+  mi_atomic_void_addi64_relaxed( &stat->peak, &src->peak);
  // or, take the max?
  // mi_atomic_maxi64_relaxed(&stat->peak, src->peak);
 }

-static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src) {
+static void mi_stat_counter_add_mt(mi_stat_counter_t* stat, const mi_stat_counter_t* src) {
  if (stat==src) return;
-  if (src->total!=0) { mi_atomic_addi64_relaxed(&stat->total, src->total); }
+  mi_atomic_void_addi64_relaxed(&stat->total, &src->total);
 }

-#define MI_STAT_COUNT(stat)    mi_stat_count_add(&stats->stat, &src->stat);
-#define MI_STAT_COUNTER(stat)  mi_stat_counter_add(&stats->stat, &src->stat);
+#define MI_STAT_COUNT(stat)    mi_stat_count_add_mt(&stats->stat, &src->stat);
+#define MI_STAT_COUNTER(stat)  mi_stat_counter_add_mt(&stats->stat, &src->stat);

 // must be thread safe as it is called from stats_merge
 static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
@ -90,11 +90,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {

  #if MI_STAT>1
  for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
-    mi_stat_count_add(&stats->malloc_bins[i], &src->malloc_bins[i]);
+    mi_stat_count_add_mt(&stats->malloc_bins[i], &src->malloc_bins[i]);
  }
  #endif
  for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
-    mi_stat_count_add(&stats->page_bins[i], &src->page_bins[i]);
+    mi_stat_count_add_mt(&stats->page_bins[i], &src->page_bins[i]);
  }
 }

@ -290,9 +290,9 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
  // mi_stat_print(&stats->malloc_large, "large", (stats->malloc_large_count.total == 0 ? 1 : -1), out, arg);
  mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg);
  mi_stat_count_t total = { 0,0,0 };
-  mi_stat_count_add(&total, &stats->malloc_normal);
+  mi_stat_count_add_mt(&total, &stats->malloc_normal);
  // mi_stat_count_add(&total, &stats->malloc_large);
-  mi_stat_count_add(&total, &stats->malloc_huge);
+  mi_stat_count_add_mt(&total, &stats->malloc_huge);
  mi_stat_print_ex(&total, "total", 1, out, arg, "");
  #endif
  #if MI_STAT>1