From 70115d8b8c0e52d8f196622901639fffed41ff9c Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Fri, 6 Dec 2024 23:25:53 -0800
Subject: [PATCH] small fixes

---
 include/mimalloc/internal.h | 41 +++++---------------
 src/arena.c                 |  8 ++--
 src/free.c                  | 12 +++---
 src/heap.c                  | 15 +-------
 src/os.c                    |  6 +--
 src/page-queue.c            | 26 ++++---------
 src/page.c                  | 77 ++-----------------------------------
 7 files changed, 34 insertions(+), 151 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index ad7c41c6..28eca4bb 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -540,30 +540,16 @@ static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
 
 static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
   if (heap != NULL) {
-    // mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
     page->heap = heap;
     page->heap_tag = heap->tag;
     mi_atomic_store_release(&page->xthread_id, heap->thread_id);
   }
   else {
-    // mi_atomic_store_release(&page->xheap, (uintptr_t)heap->tld->subproc);
     page->heap = NULL;
     mi_atomic_store_release(&page->xthread_id,0);
   }
 }
 
-//static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
-//  mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
-//  if (heap != NULL) {
-//    mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
-//    page->heap_tag = heap->tag;
-//    mi_atomic_store_release(&page->xthread_id, heap->thread_id);
-//  }
-//  else {
-//    mi_atomic_store_release(&page->xheap, (uintptr_t)mi_page_heap(page)->tld->subproc);
-//    mi_atomic_store_release(&page->xthread_id,0);
-//  }
-//}
 
 // Thread free flag helpers
 static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
@@ -650,24 +636,24 @@ static inline bool mi_page_is_used_at_frac(const mi_page_t* page, uint16_t n) {
 
 static inline bool mi_page_is_abandoned(const mi_page_t* page) {
   // note: the xheap field of an abandoned heap is set to the subproc (for fast reclaim-on-free)
-  return (mi_atomic_load_acquire(&page->xthread_id) <= 1);
+  return (mi_atomic_load_relaxed(&page->xthread_id) <= 1);
 }
 
 static inline bool mi_page_is_abandoned_mapped(const mi_page_t* page) {
-  return (mi_atomic_load_acquire(&page->xthread_id) == 1);
+  return (mi_atomic_load_relaxed(&page->xthread_id) == 1);
 }
 
 static inline void mi_page_set_abandoned_mapped(mi_page_t* page) {
-  mi_atomic_or_acq_rel(&page->xthread_id, (uintptr_t)1);
+  mi_atomic_or_relaxed(&page->xthread_id, (uintptr_t)1);
 }
 
 static inline void mi_page_clear_abandoned_mapped(mi_page_t* page) {
-  mi_atomic_and_acq_rel(&page->xthread_id, ~(uintptr_t)1);
+  mi_atomic_and_relaxed(&page->xthread_id, ~(uintptr_t)1);
 }
 
 
 static inline bool mi_page_is_huge(const mi_page_t* page) {
-  return (page->block_size > MI_LARGE_MAX_OBJ_SIZE || 
+  return (page->block_size > MI_LARGE_MAX_OBJ_SIZE ||
           (mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.base < (void*)page));
 }
 
@@ -683,15 +669,6 @@ static inline void _mi_page_unown_unconditional(mi_page_t* page) {
   mi_assert_internal(mi_page_thread_id(page)==0);
   const uintptr_t old = mi_atomic_and_acq_rel(&page->xthread_free, ~((uintptr_t)1));
   mi_assert_internal((old&1)==1); MI_UNUSED(old);
-  /*
-  mi_thread_free_t tf_new;
-  mi_thread_free_t tf_old;
-  do {
-    tf_old = mi_atomic_load_relaxed(&page->xthread_free);
-    mi_assert_internal(mi_tf_is_owned(tf_old));
-    tf_new = mi_tf_create(mi_tf_block(tf_old), false);
-  } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf_old, tf_new));
-  */
 }
 
 
@@ -721,7 +698,7 @@ static inline bool _mi_page_unown(mi_page_t* page) {
     }
     mi_assert_internal(mi_tf_block(tf_old)==NULL);
     tf_new = mi_tf_create(NULL, false);
-  } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf_old, tf_new));
+  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new));
   return false;
 }
 
@@ -729,15 +706,15 @@ static inline bool _mi_page_unown(mi_page_t* page) {
 // Page flags
 //-----------------------------------------------------------
 static inline mi_page_flags_t mi_page_flags(const mi_page_t* page) {
-  return mi_atomic_load_acquire(&page->xflags);
+  return mi_atomic_load_relaxed(&page->xflags);
 }
 
 static inline void mi_page_flags_set(mi_page_t* page, bool set, mi_page_flags_t newflag) {
   if (set) {
-    mi_atomic_or_acq_rel(&page->xflags, newflag);
+    mi_atomic_or_relaxed(&page->xflags, newflag);
   }
   else {
-    mi_atomic_and_acq_rel(&page->xflags, ~newflag);
+    mi_atomic_and_relaxed(&page->xflags, ~newflag);
   }
 }
 
diff --git a/src/arena.c b/src/arena.c
index 45697081..8362a31f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -944,7 +944,7 @@ void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
 bool _mi_arena_contains(const void* p) {
   const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
   for (size_t i = 0; i < max_arena; i++) {
-    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
     if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) > (const uint8_t*)p) {
       return true;
     }
@@ -1140,7 +1140,7 @@ static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bi
     if (i<10)        { buf[k++] = ('0' + (char)i); buf[k++] = ' '; buf[k++] = ' '; }
     else if (i<100)  { buf[k++] = ('0' + (char)(i/10)); buf[k++] = ('0' + (char)(i%10)); buf[k++] = ' '; }
     else if (i<1000) { buf[k++] = ('0' + (char)(i/100)); buf[k++] = ('0' + (char)((i%100)/10)); buf[k++] = ('0' + (char)(i%10)); }
-    
+
     for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
       if (j > 0 && (j % 4) == 0) {
         buf[k++] = '\n'; _mi_memset(buf+k,' ',5); k += 5;
@@ -1174,7 +1174,7 @@ void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge)
   //size_t abandoned_total = 0;
   size_t purge_total = 0;
   for (size_t i = 0; i < max_arenas; i++) {
-    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
     if (arena == NULL) break;
     slice_total += arena->slice_count;
     _mi_output_message("arena %zu: %zu slices (%zu MiB)%s\n", i, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""));
@@ -1324,7 +1324,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_
 static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats) {
   if (_mi_preloading() || mi_arena_purge_delay() <= 0) return;  // nothing will be scheduled
 
-  const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count);
+  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
   if (max_arena == 0) return;
 
   // _mi_error_message(EFAULT, "purging not yet implemented\n");
diff --git a/src/free.c b/src/free.c
index afb23838..ece55599 100644
--- a/src/free.c
+++ b/src/free.c
@@ -70,7 +70,7 @@ static inline void mi_free_block_mt(mi_page_t* page, mi_block_t* block)
   do {
     mi_block_set_next(page, block, mi_tf_block(tf_old));
     tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
-  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new));
+  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new)); // todo: release is enough?
 
   // and atomically try to collect the page if it was abandoned
   const bool is_owned_now = !mi_tf_is_owned(tf_old);
@@ -207,17 +207,17 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) {
   #endif
 
   // 1. free if the page is free now
-  if (mi_page_all_free(page)) 
+  if (mi_page_all_free(page))
   {
     // first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish)
-    _mi_arena_page_unabandon(page); 
+    _mi_arena_page_unabandon(page);
     // we can free the page directly
     _mi_arena_page_free(page);
     return;
   }
-    
+
   // 2. if the page is not too full, we can try to reclaim it for ourselves
-  if (_mi_option_get_fast(mi_option_reclaim_on_free) != 0 && 
+  if (_mi_option_get_fast(mi_option_reclaim_on_free) != 0 &&
       !mi_page_is_used_at_frac(page,8))
   {
     // the page has still some blocks in use (but not too many)
@@ -234,7 +234,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) {
           (page->subproc == tagheap->tld->subproc) &&  // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
           (_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id))  // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
          )
-      {        
+      {
         if (mi_page_queue(tagheap, page->block_size)->first != NULL) {  // don't reclaim for an block_size we don't use
           // first remove it from the abandoned pages in the arena -- this waits for any readers to finish
           _mi_arena_page_unabandon(page);
diff --git a/src/heap.c b/src/heap.c
index 2ff40930..d687f25e 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -136,24 +136,11 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
     _mi_arena_reclaim_all_abandoned(heap);
   }
 
-  // if abandoning, mark all pages to no longer add to delayed_free
-  //if (collect == MI_ABANDON) {
-  //  mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
-  //}
-
-  // free all current thread delayed blocks.
-  // (if abandoning, after this there are no more thread-delayed references into the pages.)
-  // _mi_heap_delayed_free_all(heap);
-
   // collect retired pages
   _mi_heap_collect_retired(heap, force);
 
   // collect all pages owned by this thread
   mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
-  // mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
-
-  // collect segments (purge pages, this can be expensive so don't force on abandonment)
-  // _mi_segments_collect(collect == MI_FORCE, &heap->tld->segments);
 
   // if forced, collect thread data cache on program-exit (or shared library unload)
   if (force && is_main_thread && mi_heap_is_backing(heap)) {
@@ -219,7 +206,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool
     if (poolData != NULL) {
       heap->no_reclaim = true;
     }
-  }  
+  }
   #endif
 
   if (heap == tld->heap_backing) {
diff --git a/src/os.c b/src/os.c
index 156a655b..b05068fd 100644
--- a/src/os.c
+++ b/src/os.c
@@ -113,8 +113,8 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st
   if (err != 0) {
     _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
   }
-  if (still_committed) { 
-    _mi_stat_decrease(&stats->committed, size); 
+  if (still_committed) {
+    _mi_stat_decrease(&stats->committed, size);
   }
   _mi_stat_decrease(&stats->reserved, size);
 }
@@ -556,7 +556,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
     #endif
     }
     end = start + size;
-  } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
+  } while (!mi_atomic_cas_weak_acq_rel(&mi_huge_start, &huge_start, end));
 
   if (total_size != NULL) *total_size = size;
   return (uint8_t*)start;
diff --git a/src/page-queue.c b/src/page-queue.c
index ad616b1d..9e3aaacc 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MI_IN_PAGE_C
 #error "this file should be included from 'page.c'"
 // include to help an IDE
-#include "mimalloc.h"     
+#include "mimalloc.h"
 #include "mimalloc/internal.h"
 #include "mimalloc/atomic.h"
 #endif
@@ -83,10 +83,10 @@ static inline uint8_t mi_bin(size_t size) {
     #if defined(MI_ALIGN4W)
     if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
     #endif
-    wsize--; 
+    wsize--;
     mi_assert_internal(wsize!=0);
     // find the highest bit position
-    uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));    
+    uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
     // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
     // - adjust with 3 because we use do not round the first 8 sizes
     //   which each get an exact bin
@@ -211,8 +211,8 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
 static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
   mi_assert_internal(page != NULL);
   mi_assert_expensive(mi_page_queue_contains(queue, page));
-  mi_assert_internal(mi_page_block_size(page) == queue->block_size || 
-                      (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || 
+  mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
+                      (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
                         (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
   mi_heap_t* heap = mi_page_heap(page);
   if (page->prev != NULL) page->prev->next = page->next;
@@ -227,7 +227,6 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
   heap->page_count--;
   page->next = NULL;
   page->prev = NULL;
-  // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL);
   mi_page_set_in_full(page,false);
 }
 
@@ -243,7 +242,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
                         (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
 
   mi_page_set_in_full(page, mi_page_queue_is_full(queue));
-  // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap);
+
   page->next = queue->first;
   page->prev = NULL;
   if (queue->first != NULL) {
@@ -346,8 +345,8 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t*
       page->prev = to->first;
       page->next = next;
       to->first->next = page;
-      if (next != NULL) { 
-        next->prev = page; 
+      if (next != NULL) {
+        next->prev = page;
       }
       else {
         to->last = page;
@@ -385,15 +384,6 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
   // set append pages to new heap and count
   size_t count = 0;
   for (mi_page_t* page = append->first; page != NULL; page = page->next) {
-    /*
-    // inline `mi_page_set_heap` to avoid wrong assertion during absorption;
-    // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
-    mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
-    // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a
-    // side effect that it spins until any DELAYED_FREEING is finished. This ensures
-    // that after appending only the new heap will be used for delayed free operations.
-    _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
-    */
     mi_page_set_heap(page, heap);
     count++;
   }
diff --git a/src/page.c b/src/page.c
index 056c9506..54e7b539 100644
--- a/src/page.c
+++ b/src/page.c
@@ -132,40 +132,6 @@ bool _mi_page_is_valid(mi_page_t* page) {
 }
 #endif
 
-/*
-void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
-  while (!_mi_page_try_use_delayed_free(page, delay, override_never)) {
-    mi_atomic_yield();
-  }
-}
-
-bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
-  mi_thread_free_t tfreex;
-  mi_delayed_t     old_delay;
-  mi_thread_free_t tfree;
-  size_t yield_count = 0;
-  do {
-    tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
-    tfreex = mi_tf_set_delayed(tfree, delay);
-    old_delay = mi_tf_delayed(tfree);
-    if mi_unlikely(old_delay == MI_DELAYED_FREEING) {
-      if (yield_count >= 4) return false;  // give up after 4 tries
-      yield_count++;
-      mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
-      // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
-    }
-    else if (delay == old_delay) {
-      break; // avoid atomic operation if already equal
-    }
-    else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
-      break; // leave never-delayed flag set
-    }
-  } while ((old_delay == MI_DELAYED_FREEING) ||
-           !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
-
-  return true; // success
-}
-*/
 
 /* -----------------------------------------------------------
   Page collect the `local_free` and `thread_free` lists
@@ -181,7 +147,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
     head = mi_tf_block(tfree);
     if (head == NULL) return; // return if the list is empty
     tfreex = mi_tf_create(NULL,mi_tf_is_owned(tfree));  // set the thread free list to NULL
-  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));
+  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));  // release is enough?
   mi_assert_internal(head != NULL);
 
   // find the tail -- also to get a proper count (without data races)
@@ -334,43 +300,6 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
   return page;
 }
 
-/* -----------------------------------------------------------
-   Do any delayed frees
-   (put there by other threads if they deallocated in a full page)
------------------------------------------------------------ */
-/*
-void _mi_heap_delayed_free_all(mi_heap_t* heap) {
-  while (!_mi_heap_delayed_free_partial(heap)) {
-    mi_atomic_yield();
-  }
-}
-
-// returns true if all delayed frees were processed
-bool _mi_heap_delayed_free_partial(mi_heap_t* heap) {
-  // take over the list (note: no atomic exchange since it is often NULL)
-  mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
-  while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) {  };
-  bool all_freed = true;
-
-  // and free them all
-  while(block != NULL) {
-    mi_block_t* next = mi_block_nextx(heap,block, heap->keys);
-    // use internal free instead of regular one to keep stats etc correct
-    if (!_mi_free_delayed_block(block)) {
-      // we might already start delayed freeing while another thread has not yet
-      // reset the delayed_freeing flag; in that case delay it further by reinserting the current block
-      // into the delayed free list
-      all_freed = false;
-      mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
-      do {
-        mi_block_set_nextx(heap, block, dfree, heap->keys);
-      } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
-    }
-    block = next;
-  }
-  return all_freed;
-}
-*/
 
 /* -----------------------------------------------------------
   Unfull, abandon, free and retire
@@ -765,7 +694,7 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m
   #if MI_STAT
   size_t count = 0;
   #endif
-  long candidate_limit = 0;          // we reset this on the first candidate to limit the search  
+  long candidate_limit = 0;          // we reset this on the first candidate to limit the search
   long full_page_retain = _mi_option_get_fast(mi_option_full_page_retain);
   mi_page_t* page_candidate = NULL;  // a page with free space
   mi_page_t* page = pq->first;
@@ -777,7 +706,7 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m
     count++;
     #endif
     candidate_limit--;
-    
+
     // collect freed blocks by us and other threads
     _mi_page_free_collect(page, false);