merge from dev-abandon

2025-09-16 19:24:47 +03:00 · 2024-02-29 15:51:37 -08:00 · 2024-02-29 15:51:37 -08:00 · 8fb51aae4d
commit 8fb51aae4d
parent 10efe291af d34d8f0f65
7 changed files with 294 additions and 327 deletions
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -345,6 +345,7 @@ typedef enum mi_option_e {
  mi_option_arena_reserve,            // initial memory size in KiB for arena reservation (1GiB on 64-bit)
  mi_option_arena_purge_mult,         
  mi_option_purge_extend_delay,
+  mi_option_abandoned_reclaim_on_free,  // reclaim abandoned segments on a free
  _mi_option_last,
  // legacy option names
  mi_option_large_os_pages = mi_option_allow_large_os_pages,
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@ -88,7 +88,7 @@ void       _mi_thread_data_collect(void);

 // os.c
 void       _mi_os_init(void);                                            // called from process init
-void*      _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);  
+void*      _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
 void       _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats);
 void       _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats);

@ -125,6 +125,10 @@ bool       _mi_arena_contains(const void* p);
 void       _mi_arena_collect(bool force_purge, mi_stats_t* stats);
 void       _mi_arena_unsafe_destroy_all(mi_stats_t* stats);

+bool          _mi_arena_segment_clear_abandoned(mi_memid_t memid);
+void          _mi_arena_segment_mark_abandoned(mi_memid_t memid);
+mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_id_t* current_id, size_t* current_idx);
+
 // "segment-map.c"
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
@ -146,6 +150,7 @@ uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t*
 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
 void       _mi_abandoned_await_readers(void);
 void       _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
+bool       _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);

 // "page.c"
 void*      _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment)  mi_attr_noexcept mi_attr_malloc;
@ -427,7 +432,7 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {

 // Segment belonging to a page
 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
-  mi_segment_t* segment = _mi_ptr_segment(page); 
+  mi_segment_t* segment = _mi_ptr_segment(page);
  mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
  return segment;
 }
@ -729,12 +734,12 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);

 #define mi_commit_mask_foreach(cm,idx,count) \
  idx = 0; \
-  while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { 
-        
+  while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) {
+
 #define mi_commit_mask_foreach_end() \
    idx += count; \
  }
-      
+


 /* -----------------------------------------------------------
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@ -181,7 +181,7 @@ typedef int32_t  mi_ssize_t;

 #define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 8KiB on 64-bit
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128KiB on 64-bit
-#define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   
+#define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 #define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32MiB on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)

@ -199,10 +199,10 @@ typedef int32_t  mi_ssize_t;
 #define MI_HUGE_BLOCK_SIZE                ((uint32_t)(2*MI_GiB))

 // blocks up to this size are always allocated aligned
-#define MI_MAX_ALIGN_GUARANTEE            (8*MI_MAX_ALIGN_SIZE)  
+#define MI_MAX_ALIGN_GUARANTEE            (8*MI_MAX_ALIGN_SIZE)

-// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments 
-#define MI_ALIGNMENT_MAX                  (MI_SEGMENT_SIZE >> 1)  
+// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments
+#define MI_ALIGNMENT_MAX                  (MI_SEGMENT_SIZE >> 1)


 // ------------------------------------------------------
@ -291,7 +291,7 @@ typedef uintptr_t mi_thread_free_t;
 typedef struct mi_page_s {
  // "owned" by the segment
  uint32_t              slice_count;       // slices in this page (0 if not a page)
-  uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)  
+  uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
  uint8_t               is_committed : 1;  // `true` if the page virtual memory is committed
  uint8_t               is_zero_init : 1;  // `true` if the page was initially zero initialized

@ -345,7 +345,7 @@ typedef enum mi_segment_kind_e {
 // A segment holds a commit mask where a bit is set if
 // the corresponding MI_COMMIT_SIZE area is committed.
 // The MI_COMMIT_SIZE must be a multiple of the slice
-// size. If it is equal we have the most fine grained 
+// size. If it is equal we have the most fine grained
 // decommit (but setting it higher can be more efficient).
 // The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will
 // be committed in one go which can be set higher than
@ -353,9 +353,9 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------

-#define MI_MINIMAL_COMMIT_SIZE      (1*MI_SEGMENT_SLICE_SIZE)            
+#define MI_MINIMAL_COMMIT_SIZE      (1*MI_SEGMENT_SLICE_SIZE)
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
-#define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
+#define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
 #define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)

@ -424,15 +424,13 @@ typedef struct mi_segment_s {
  mi_commit_mask_t  purge_mask;
  mi_commit_mask_t  commit_mask;

-  _Atomic(struct mi_segment_s*) abandoned_next;
-
  // from here is zero initialized
  struct mi_segment_s* next;            // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
-  
+
  size_t            abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
  size_t            abandoned_visits;   // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
  size_t            used;               // count of pages in use
-  uintptr_t         cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`  
+  uintptr_t         cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`

  size_t            segment_slices;      // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
  size_t            segment_info_slices; // initial slices we are using segment info and possible guard pages.
@ -503,7 +501,7 @@ struct mi_heap_s {
  mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
  _Atomic(mi_block_t*)  thread_delayed_free;
  mi_threadid_t         thread_id;                           // thread this heap belongs too
-  mi_arena_id_t         arena_id;                            // arena id if the heap belongs to a specific arena (or 0)  
+  mi_arena_id_t         arena_id;                            // arena id if the heap belongs to a specific arena (or 0)
  uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)
  uintptr_t             keys[2];                             // two random keys used to encode the `thread_delayed_free` list
  mi_random_ctx_t       random;                              // random number context used for secure allocation
--- a/src/alloc.c
+++ b/src/alloc.c
@ -58,7 +58,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
    }
    else {
      _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE);
-    }    
+    }
  }

 #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
@ -113,7 +113,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
  if (size == 0) { size = sizeof(void*); }
  #endif
  mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
-  void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);  
+  void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
  mi_track_malloc(p,size,zero);
  #if MI_STAT>1
  if (p != NULL) {
@ -346,7 +346,7 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
 // only maintain stats for smaller objects if requested
 #if (MI_STAT>0)
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-  #if (MI_STAT < 2)  
+  #if (MI_STAT < 2)
  MI_UNUSED(block);
  #endif
  mi_heap_t* const heap = mi_heap_get_default();
@ -354,7 +354,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
  #if (MI_STAT>1)
  const size_t usize = mi_page_usable_size_of(page, block);
  mi_heap_stat_decrease(heap, malloc, usize);
-  #endif  
+  #endif
  if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
    mi_heap_stat_decrease(heap, normal, bsize);
    #if (MI_STAT > 1)
@ -366,7 +366,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
  }
  else {
    mi_heap_stat_decrease(heap, huge, bsize);
-  }  
+  }
 }
 #else
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
@ -401,13 +401,25 @@ static void mi_stat_huge_free(const mi_page_t* page) {
 // multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
 static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
 {
+  // first see if the segment was abandoned and we can reclaim it
+  mi_segment_t* const segment = _mi_page_segment(page);
+  if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) &&
+      mi_atomic_load_relaxed(&segment->thread_id) == 0)
+  {
+    // the segment is abandoned, try to reclaim it into our heap
+    if (_mi_segment_attempt_reclaim(mi_prim_get_default_heap(), segment)) {
+      mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
+      mi_free(block);  // recursively free as now it will be a local free in our heap
+      return;
+    }
+  }
+
  // The padding check may access the non-thread-owned page for the key values.
  // that is safe as these are constant and the page won't be freed (as the block is not freed yet).
  mi_check_padding(page, block);
  _mi_padding_shrink(page, block, sizeof(mi_block_t));       // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
-  
-  // huge page segments are always abandoned and can be freed immediately
-  mi_segment_t* segment = _mi_page_segment(page);
+
+  // huge page segments are always abandoned and can be freed immediately  
  if (segment->kind == MI_SEGMENT_HUGE) {
    #if MI_HUGE_PAGE_ABANDON
    // huge page segments are always abandoned and can be freed immediately
@ -421,7 +433,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
    _mi_segment_huge_page_reset(segment, page, block);
    #endif
  }
-  
+
  #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN        // note: when tracking, cannot use mi_usable_size with multi-threading
  if (segment->kind != MI_SEGMENT_HUGE) {                  // not for huge segments as we just reset the content
    memset(block, MI_DEBUG_FREED, mi_usable_size(block));
--- a/src/arena.c
+++ b/src/arena.c
@ -13,7 +13,7 @@ threads and need to be accessed using atomic operations.

 Arenas are used to for huge OS page (1GiB) reservations or for reserving
 OS memory upfront which can be improve performance or is sometimes needed
-on embedded devices. We can also employ this with WASI or `sbrk` systems 
+on embedded devices. We can also employ this with WASI or `sbrk` systems
 to reserve large arenas upfront and be able to reuse the memory more effectively.

 The arena allocation needs to be thread safe and we use an atomic bitmap to allocate.
@ -48,13 +48,14 @@ typedef struct mi_arena_s {
  size_t   meta_size;                     // size of the arena structure itself (including its bitmaps)
  mi_memid_t meta_memid;                  // memid of the arena structure itself (OS or static allocation)
  int      numa_node;                     // associated NUMA node
-  bool     exclusive;                     // only allow allocations if specifically for this arena  
+  bool     exclusive;                     // only allow allocations if specifically for this arena
  bool     is_large;                      // memory area consists of large- or huge OS pages (always committed)
  _Atomic(size_t) search_idx;             // optimization to start the search for free blocks
-  _Atomic(mi_msecs_t) purge_expire;       // expiration time when blocks should be decommitted from `blocks_decommit`.  
+  _Atomic(mi_msecs_t) purge_expire;       // expiration time when blocks should be decommitted from `blocks_decommit`.
  mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
  mi_bitmap_field_t* blocks_committed;    // are the blocks committed? (can be NULL for memory that cannot be decommitted)
-  mi_bitmap_field_t* blocks_purge;        // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)  
+  mi_bitmap_field_t* blocks_purge;        // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
+  mi_bitmap_field_t* blocks_abandoned;    // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
  mi_bitmap_field_t  blocks_inuse[1];     // in-place bitmap of in-use blocks (of size `field_count`)
 } mi_arena_t;

@ -94,7 +95,7 @@ bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_i
    return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
  }
  else {
-    return mi_arena_id_is_suitable(0, false, request_arena_id);
+    return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id);
  }
 }

@ -103,7 +104,7 @@ bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) {
 }

 /* -----------------------------------------------------------
-  Arena allocations get a (currently) 16-bit memory id where the 
+  Arena allocations get a (currently) 16-bit memory id where the
  lower 8 bits are the arena id, and the upper bits the block index.
 ----------------------------------------------------------- */

@ -211,7 +212,7 @@ static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index
 {
  size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
  if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
-    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around    
+    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around
    return true;
  };
  return false;
@ -231,7 +232,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
  mi_bitmap_index_t bitmap_index;
  if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL;

-  // claimed it! 
+  // claimed it!
  void* p = mi_arena_block_start(arena, bitmap_index);
  *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index);
  memid->is_pinned = arena->memid.is_pinned;
@ -271,21 +272,21 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
    // no need to commit, but check if already fully committed
    memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
  }
-  
+
  return p;
 }

 // allocate in a speficic arena
-static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, 
-                                       bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) 
+static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment,
+                                       bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
 {
  MI_UNUSED_RELEASE(alignment);
  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
-  const size_t bcount = mi_block_count_of_size(size);  
+  const size_t bcount = mi_block_count_of_size(size);
  const size_t arena_index = mi_arena_id_index(arena_id);
  mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
  mi_assert_internal(size <= mi_arena_block_size(bcount));
-  
+
  // Check arena suitability
  mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
  if (arena == NULL) return NULL;
@ -305,7 +306,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no


 // allocate from an arena with fallback to the OS
-static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, 
+static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment,
                                                  bool commit, bool allow_large,
                                                  mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
 {
@ -313,9 +314,9 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz
  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
  if mi_likely(max_arena == 0) return NULL;
-  
+
  if (req_arena_id != _mi_arena_id_none()) {
-    // try a specific arena if requested 
+    // try a specific arena if requested
    if (mi_arena_id_index(req_arena_id) < max_arena) {
      void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
      if (p != NULL) return p;
@ -323,7 +324,7 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz
  }
  else {
    // try numa affine allocation
-    for (size_t i = 0; i < max_arena; i++) {    
+    for (size_t i = 0; i < max_arena; i++) {
      void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
      if (p != NULL) return p;
    }
@ -351,22 +352,22 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
  size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve);
  if (arena_reserve == 0) return false;

-  if (!_mi_os_has_virtual_reserve()) { 
+  if (!_mi_os_has_virtual_reserve()) {
    arena_reserve = arena_reserve/4;  // be conservative if virtual reserve is not supported (for some embedded systems for example)
  }
  arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
  if (arena_count >= 8 && arena_count <= 128) {
    arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve;  // scale up the arena sizes exponentially
-  }    
+  }
  if (arena_reserve < req_size) return false;  // should be able to at least handle the current allocation size
-      
+
  // commit eagerly?
  bool arena_commit = false;
  if (mi_option_get(mi_option_arena_eager_commit) == 2)      { arena_commit = _mi_os_has_overcommit(); }
  else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }

  return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0);
-}    
+}


 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large,
@ -381,9 +382,9 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
  // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
  if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
    void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
-    if (p != NULL) return p;    
+    if (p != NULL) return p;

-    // otherwise, try to first eagerly reserve a new arena 
+    // otherwise, try to first eagerly reserve a new arena
    if (req_arena_id == _mi_arena_id_none()) {
      mi_arena_id_t arena_id = 0;
      if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
@ -400,14 +401,14 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
    errno = ENOMEM;
    return NULL;
  }
-   
+
  // finally, fall back to the OS
  if (align_offset > 0) {
    return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats);
  }
  else {
    return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats);
-  }  
+  }
 }

 void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
@ -443,22 +444,22 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
  mi_assert_internal(arena->blocks_purge != NULL);
  mi_assert_internal(!arena->memid.is_pinned);
  const size_t size = mi_arena_block_size(blocks);
-  void* const p = mi_arena_block_start(arena, bitmap_idx); 
+  void* const p = mi_arena_block_start(arena, bitmap_idx);
  bool needs_recommit;
  if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
    // all blocks are committed, we can purge freely
    needs_recommit = _mi_os_purge(p, size, stats);
  }
  else {
-    // some blocks are not committed -- this can happen when a partially committed block is freed 
+    // some blocks are not committed -- this can happen when a partially committed block is freed
    // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
-    // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), 
+    // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
    // and also undo the decommit stats (as it was already adjusted)
    mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
    needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats);
    _mi_stat_increase(&stats->committed, size);
  }
-  
+
  // clear the purged blocks
  _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
  // update committed bitmap
@ -476,7 +477,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t

  if (_mi_preloading() || delay == 0) {
    // decommit directly
-    mi_arena_purge(arena, bitmap_idx, blocks, stats);    
+    mi_arena_purge(arena, bitmap_idx, blocks, stats);
  }
  else {
    // schedule decommit
@ -518,7 +519,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx,
 }

 // returns true if anything was purged
-static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) 
+static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats)
 {
  if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false;
  mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
@ -527,10 +528,10 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi

  // reset expire (if not already set concurrently)
  mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0);
-  
+
  // potential purges scheduled, walk through the bitmap
  bool any_purged = false;
-  bool full_purge = true;  
+  bool full_purge = true;
  for (size_t i = 0; i < arena->field_count; i++) {
    size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]);
    if (purge != 0) {
@ -581,7 +582,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats )

  // allow only one thread to purge at a time
  static mi_atomic_guard_t purge_guard;
-  mi_atomic_guard(&purge_guard) 
+  mi_atomic_guard(&purge_guard)
  {
    mi_msecs_t now = _mi_clock_now();
    size_t max_purge_count = (visit_all ? max_arena : 1);
@ -594,7 +595,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats )
        }
      }
    }
-  }  
+  }
 }


@ -608,7 +609,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
  if (p==NULL) return;
  if (size==0) return;
  const bool all_committed = (committed_size == size);
-  
+
  if (mi_memkind_is_os(memid.memkind)) {
    // was a direct OS allocation, pass through
    if (!all_committed && committed_size > 0) {
@ -626,7 +627,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]);
    mi_assert_internal(arena != NULL);
    const size_t blocks = mi_block_count_of_size(size);
-    
+
    // checks
    if (arena == NULL) {
      _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
@ -648,7 +649,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
    else {
      mi_assert_internal(arena->blocks_committed != NULL);
      mi_assert_internal(arena->blocks_purge != NULL);
-      
+
      if (!all_committed) {
        // mark the entire range as no longer committed (so we recommit the full range when re-using)
        _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
@ -663,9 +664,9 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
        // works (as we should never reset decommitted parts).
      }
      // (delay) purge the entire range
-      mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);      
+      mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);
    }
-    
+
    // and make it available to others again
    bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
    if (!all_inuse) {
@ -690,9 +691,9 @@ static void mi_arenas_unsafe_destroy(void) {
  for (size_t i = 0; i < max_arena; i++) {
    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
    if (arena != NULL) {
-      if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {      
+      if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
        mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
-        _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); 
+        _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
      }
      else {
        new_max_arena = i;
@ -715,7 +716,7 @@ void _mi_arena_collect(bool force_purge, mi_stats_t* stats) {
 // for dynamic libraries that are unloaded and need to release all their allocated memory.
 void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
  mi_arenas_unsafe_destroy();
-  _mi_arena_collect(true /* force purge */, stats);  // purge non-owned arenas  
+  _mi_arena_collect(true /* force purge */, stats);  // purge non-owned arenas
 }

 // Is a pointer inside any of our arenas?
@ -723,13 +724,97 @@ bool _mi_arena_contains(const void* p) {
  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
  for (size_t i = 0; i < max_arena; i++) {
    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
-    if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { 
-      return true;      
+    if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
+      return true;
    }
  }
  return false;
 }

+/* -----------------------------------------------------------
+  Abandoned blocks/segments.
+  This is used to atomically abandon/reclaim segments 
+  (and crosses the arena API but it is convenient to have here).
+  Abandoned segments still have live blocks; they get reclaimed
+  when a thread frees in it, or when a thread needs a fresh
+  segment; these threads scan the abandoned segments through
+  the arena bitmaps.
+----------------------------------------------------------- */
+
+// reclaim a specific abandoned segment; `true` on success.
+bool _mi_arena_segment_clear_abandoned(mi_memid_t memid ) 
+{
+  if (memid.memkind != MI_MEM_ARENA) return true;  // not in an arena, consider it un-abandoned
+  size_t arena_idx;
+  size_t bitmap_idx;
+  mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
+  mi_assert_internal(arena_idx < MI_MAX_ARENAS);
+  mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
+  mi_assert_internal(arena != NULL);
+  bool was_abandoned = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
+  // mi_assert_internal(was_abandoned);
+  mi_assert_internal(!was_abandoned || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
+  //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
+  return was_abandoned;
+}
+
+// mark a specific segment as abandoned
+void _mi_arena_segment_mark_abandoned(mi_memid_t memid) 
+{
+  if (memid.memkind != MI_MEM_ARENA) return;  // not in an arena
+  size_t arena_idx;
+  size_t bitmap_idx;
+  mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
+  mi_assert_internal(arena_idx < MI_MAX_ARENAS);
+  mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
+  mi_assert_internal(arena != NULL);
+  const bool was_unset = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
+  MI_UNUSED_RELEASE(was_unset);
+  mi_assert_internal(was_unset);
+  mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
+}
+
+// reclaim abandoned segments 
+mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_id_t* previous_id, size_t* previous_idx ) 
+{
+  const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count);
+  int arena_idx = *previous_id;
+  size_t field_idx = mi_bitmap_index_field(*previous_idx);
+  size_t bit_idx = mi_bitmap_index_bit_in_field(*previous_idx) + 1;
+  // visit arena's (from previous)
+  for( ; arena_idx < max_arena; arena_idx++, field_idx = 0, bit_idx = 0) {
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
+    if (arena != NULL) {
+      // visit the abandoned fields (starting at previous_idx)
+      for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
+        mi_bitmap_field_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
+        if mi_unlikely(field != 0) { // skip zero fields quickly
+          // visit each set bit in the field  (todo: maybe use `ctz` here?)
+          for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
+            // pre-check if the bit is set
+            mi_bitmap_field_t mask = ((mi_bitmap_field_t)1 << bit_idx);
+            if mi_unlikely((field & mask) == mask) {
+              mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
+              // try to reclaim it atomically
+              if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) {
+                *previous_idx = bitmap_idx;
+                *previous_id = arena_idx;
+                mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
+                //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
+                return (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  // no more found
+  *previous_idx = 0;
+  *previous_id = 0;
+  return NULL;
+}
+

 /* -----------------------------------------------------------
  Add an arena.
@ -763,13 +848,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int

  const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
  const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
-  const size_t bitmaps = (memid.is_pinned ? 2 : 4);
+  const size_t bitmaps = (memid.is_pinned ? 3 : 5);
  const size_t asize  = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
  mi_memid_t meta_memid;
  mi_arena_t* arena   = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
  if (arena == NULL) return false;
-  
-  // already zero'd due to os_alloc
+
+  // already zero'd due to zalloc
  // _mi_memzero(arena, asize);
  arena->id = _mi_arena_id_none();
  arena->memid = memid;
@ -783,14 +868,16 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
  arena->is_large     = is_large;
  arena->purge_expire = 0;
  arena->search_idx   = 0;
-  arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
-  arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
-  arena->blocks_purge  = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap  
+  // consequetive bitmaps
+  arena->blocks_dirty     = &arena->blocks_inuse[fields];     // just after inuse bitmap
+  arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap
+  arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after abandonde bitmap
+  arena->blocks_purge     = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[4*fields]); // just after committed bitmap
  // initialize committed bitmap?
  if (arena->blocks_committed != NULL && arena->memid.initially_committed) {
    memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
  }
-  
+
  // and claim leftover blocks if needed (so we never allocate there)
  ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
  mi_assert_internal(post >= 0);
--- a/src/options.c
+++ b/src/options.c
@ -81,7 +81,7 @@ static mi_option_desc_t options[_mi_option_last] =
  { 100, UNINIT, MI_OPTION(os_tag) },                   // only apple specific for now but might serve more or less related purpose
  { 16,  UNINIT, MI_OPTION(max_errors) },               // maximum errors that are output
  { 16,  UNINIT, MI_OPTION(max_warnings) },             // maximum warnings that are output
-  { 8,   UNINIT, MI_OPTION(max_segment_reclaim)},       // max. number of segment reclaims from the abandoned segments per try.
+  { 16,  UNINIT, MI_OPTION(max_segment_reclaim)},       // max. number of segment reclaims from the abandoned segments per try.
  { 0,   UNINIT, MI_OPTION(destroy_on_exit)},           // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
  #if (MI_INTPTR_SIZE>4)
  { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) },  // reserve memory N KiB at a time
@ -90,6 +90,7 @@ static mi_option_desc_t options[_mi_option_last] =
  #endif
  { 10,  UNINIT, MI_OPTION(arena_purge_mult) },        // purge delay multiplier for arena's
  { 1,   UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
+  { 1,   UNINIT, MI_OPTION(abandoned_reclaim_on_free) }, // reclaim an abandoned segment on a free
 };

 static void mi_option_init(mi_option_desc_t* desc);
--- a/src/segment.c
+++ b/src/segment.c
@ -17,7 +17,7 @@ static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t*


 // -------------------------------------------------------------------
-// commit mask 
+// commit mask
 // -------------------------------------------------------------------

 static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
@ -331,7 +331,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
 uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
 {
  const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
-  uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page->xblock_size, page_size);  
+  uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page->xblock_size, page_size);
  mi_assert_internal(page->xblock_size > 0 || _mi_ptr_page(p) == page);
  mi_assert_internal(_mi_ptr_segment(p) == segment);
  return p;
@ -342,7 +342,7 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz
  size_t page_size = _mi_os_page_size();
  size_t isize     = _mi_align_up(sizeof(mi_segment_t), page_size);
  size_t guardsize = 0;
-  
+
  if (MI_SECURE>0) {
    // in secure mode, we set up a protected page in between the segment info
    // and the page data (and one at the end of the segment)
@ -355,7 +355,7 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz
  if (pre_size != NULL) *pre_size = isize;
  isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE);
  if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE;
-  size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) );  
+  size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) );
  mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0);
  return (segment_size / MI_SEGMENT_SLICE_SIZE);
 }
@ -391,7 +391,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {

  // purge delayed decommits now? (no, leave it to the arena)
  // mi_segment_try_purge(segment,true,tld->stats);
-  
+
  const size_t size = mi_segment_size(segment);
  const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);

@ -399,7 +399,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
  _mi_arena_free(segment, mi_segment_size(segment), csize, segment->memid, tld->stats);
 }

-// called by threads that are terminating 
+// called by threads that are terminating
 void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
  MI_UNUSED(tld);
  // nothing to do
@ -451,7 +451,7 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin

  size_t bitidx = start / MI_COMMIT_SIZE;
  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
-  
+
  size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
  if (bitidx + bitcount > MI_COMMIT_MASK_BITS) {
    _mi_warning_message("commit mask overflow: idx=%zu count=%zu start=%zx end=%zx p=0x%p size=%zu fullsize=%zu\n", bitidx, bitcount, start, end, p, size, *full_size);
@ -479,7 +479,7 @@ static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size, mi
    if (!_mi_os_commit(start, full_size, &is_zero, stats)) return false;
    mi_commit_mask_set(&segment->commit_mask, &mask);
  }
-  
+
  // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon.
  if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) {
    segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay);
@ -498,7 +498,7 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_
  return mi_segment_commit(segment, p, size, stats);
 }

-static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {    
+static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));
  if (!segment->allow_purge) return true;

@ -517,11 +517,11 @@ static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_
    if (decommitted) {
      mi_commit_mask_t cmask;
      mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
-      _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting 
+      _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting
      mi_commit_mask_clear(&segment->commit_mask, &mask);
-    }        
+    }
  }
-  
+
  // always clear any scheduled purges in our range
  mi_commit_mask_clear(&segment->purge_mask, &mask);
  return true;
@ -537,16 +537,16 @@ static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t
    // register for future purge in the purge mask
    uint8_t* start = NULL;
    size_t   full_size = 0;
-    mi_commit_mask_t mask; 
+    mi_commit_mask_t mask;
    mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask);
    if (mi_commit_mask_is_empty(&mask) || full_size==0) return;
-    
+
    // update delayed commit
-    mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask));      
+    mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask));
    mi_commit_mask_t cmask;
    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only purge what is committed; span_free may try to decommit more
    mi_commit_mask_set(&segment->purge_mask, &cmask);
-    mi_msecs_t now = _mi_clock_now();    
+    mi_msecs_t now = _mi_clock_now();
    if (segment->purge_expire == 0) {
      // no previous purgess, initialize now
      segment->purge_expire = now + mi_option_get(mi_option_purge_delay);
@ -564,7 +564,7 @@ static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t
      // previous purge mask is not yet expired, increase the expiration by a bit.
      segment->purge_expire += mi_option_get(mi_option_purge_extend_delay);
    }
-  }  
+  }
 }

 static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) {
@ -602,7 +602,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) {
 // note: can be called on abandoned segments
 static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) {
  mi_assert_internal(slice_index < segment->slice_entries);
-  mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) 
+  mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment)
                          ? NULL : mi_span_queue_for(slice_count,tld));
  if (slice_count==0) slice_count = 1;
  mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries);
@ -623,7 +623,7 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
  if (allow_purge) {
    mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
  }
-  
+
  // and push it on the free page queue (if it was not a huge page)
  if (sq != NULL) mi_span_queue_push( sq, slice );
             else slice->xblock_size = 0; // mark huge page as free anyways
@ -657,7 +657,7 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
    // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case)
    mi_assert_internal((segment->used==0 && slice->xblock_size==0) || segment->used == 1);  // decreased right after this call in `mi_segment_page_clear`
    slice->xblock_size = 0;  // mark as free anyways
-    // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to 
+    // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to
    // avoid a possible cache miss (and the segment is about to be freed)
    return slice;
  }
@ -719,7 +719,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
  size_t extra = slice_count-1;
  if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET;
  if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1;  // huge objects may have more slices than avaiable entries in the segment->slices
-  
+
  mi_slice_t* slice_next = slice + 1;
  for (size_t i = 1; i <= extra; i++, slice_next++) {
    slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
@ -737,7 +737,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
    last->slice_count = 0;
    last->xblock_size = 1;
  }
-  
+
  // and initialize the page
  page->is_committed = true;
  segment->used++;
@ -796,7 +796,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
 ----------------------------------------------------------- */

 static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id,
-                                          size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, 
+                                          size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices,
                                          bool commit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)

 {
@ -804,7 +804,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
  bool   allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy
  size_t align_offset = 0;
  size_t alignment = MI_SEGMENT_ALIGN;
-  
+
  if (page_alignment > 0) {
    // mi_assert_internal(huge_page != NULL);
    mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN);
@ -822,21 +822,21 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
    return NULL;  // failed to allocate
  }

-  // ensure metadata part of the segment is committed  
-  mi_commit_mask_t commit_mask; 
-  if (memid.initially_committed) { 
-    mi_commit_mask_create_full(&commit_mask);  
+  // ensure metadata part of the segment is committed
+  mi_commit_mask_t commit_mask;
+  if (memid.initially_committed) {
+    mi_commit_mask_create_full(&commit_mask);
  }
-  else { 
+  else {
    // at least commit the info slices
    const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
    mi_assert_internal(commit_needed>0);
-    mi_commit_mask_create(0, commit_needed, &commit_mask);    
+    mi_commit_mask_create(0, commit_needed, &commit_mask);
    mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE);
    if (!_mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, NULL, tld->stats)) {
      _mi_arena_free(segment,segment_size,0,memid,tld->stats);
      return NULL;
-    }    
+    }
  }
  mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);

@ -847,8 +847,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
  segment->commit_mask = commit_mask;
  segment->purge_expire = 0;
  mi_commit_mask_create_empty(&segment->purge_mask);
-  mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);  // tsan
-  
+
  mi_segments_track_size((long)(segment_size), tld);
  _mi_segment_map_allocated_at(segment);
  return segment;
@ -859,32 +858,32 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
 static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
 {
  mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL));
-  
+
  // calculate needed sizes first
  size_t info_slices;
  size_t pre_size;
  size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
-  
+
  // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
  const bool eager_delay = (// !_mi_os_has_overcommit() &&             // never delay on overcommit systems
                            _mi_current_thread_count() > 1 &&       // do not delay for the first N threads
                            tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
  const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
-  bool commit = eager || (required > 0);   
-  
-  // Allocate the segment from the OS  
-  mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, 
+  bool commit = eager || (required > 0);
+
+  // Allocate the segment from the OS
+  mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id,
                                              &segment_slices, &pre_size, &info_slices, commit, tld, os_tld);
  if (segment == NULL) return NULL;
-  
-  // zero the segment info? -- not always needed as it may be zero initialized from the OS   
+
+  // zero the segment info? -- not always needed as it may be zero initialized from the OS
  if (!segment->memid.initially_zero) {
    ptrdiff_t ofs    = offsetof(mi_segment_t, next);
    size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
-    size_t    zsize  = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more  
+    size_t    zsize  = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more
    _mi_memzero((uint8_t*)segment + ofs, zsize);
  }
-  
+
  // initialize the rest of the segment info
  const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
  segment->segment_slices = segment_slices;
@ -902,7 +901,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
  if (MI_SECURE>0) {
    // in secure mode, we set up a protected page in between the segment info
    // and the page data, and at the end of the segment.
-    size_t os_pagesize = _mi_os_page_size();    
+    size_t os_pagesize = _mi_os_page_size();
    mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size);
    _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize);
    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize;
@ -914,10 +913,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi

  // reserve first slices for segment info
  mi_page_t* page0 = mi_segment_span_allocate(segment, 0, info_slices, tld);
-  mi_assert_internal(page0!=NULL); if (page0==NULL) return NULL; // cannot fail as we always commit in advance  
+  mi_assert_internal(page0!=NULL); if (page0==NULL) return NULL; // cannot fail as we always commit in advance
  mi_assert_internal(segment->used == 1);
  segment->used = 0; // don't count our internal slices towards usage
-  
+
  // initialize initial free pages
  if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
    mi_assert_internal(huge_page==NULL);
@ -928,7 +927,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
    mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask));
    mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask));
    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld);
-    mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance 
+    mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance
  }

  mi_assert_expensive(mi_segment_is_valid(segment,tld));
@ -982,7 +981,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
  mi_assert_internal(mi_page_all_free(page));
  mi_segment_t* segment = _mi_ptr_segment(page);
  mi_assert_internal(segment->used > 0);
-  
+
  size_t inuse = page->capacity * mi_page_block_size(page);
  _mi_stat_decrease(&tld->stats->page_committed, inuse);
  _mi_stat_decrease(&tld->stats->pages, 1);
@ -990,7 +989,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
  // reset the page memory to reduce memory pressure?
  if (segment->allow_decommit && mi_option_is_enabled(mi_option_deprecated_page_reset)) {
    size_t psize;
-    uint8_t* start = _mi_page_start(segment, page, &psize);    
+    uint8_t* start = _mi_page_start(segment, page, &psize);
    _mi_os_reset(start, psize, tld->stats);
  }

@ -1001,7 +1000,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
  page->xblock_size = 1;

  // and free it
-  mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);  
+  mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);
  segment->used--;
  // cannot assert segment valid as it is called during reclaim
  // mi_assert_expensive(mi_segment_is_valid(segment, tld));
@ -1036,172 +1035,20 @@ Abandonment
 When threads terminate, they can leave segments with
 live blocks (reachable through other threads). Such segments
 are "abandoned" and will be reclaimed by other threads to
-reuse their pages and/or free them eventually
+reuse their pages and/or free them eventually. The
+`thread_id` of such segments is 0.

-We maintain a global list of abandoned segments that are
-reclaimed on demand. Since this is shared among threads
-the implementation needs to avoid the A-B-A problem on
-popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
-We use tagged pointers to avoid accidentally identifying
-reused segments, much like stamped references in Java.
-Secondly, we maintain a reader counter to avoid resetting
-or decommitting segments that have a pending read operation.
-
-Note: the current implementation is one possible design;
-another way might be to keep track of abandoned segments
-in the arenas/segment_cache's. This would have the advantage of keeping
-all concurrent code in one place and not needing to deal
-with ABA issues. The drawback is that it is unclear how to
-scan abandoned segments efficiently in that case as they
-would be spread among all other segments in the arenas.
+When a block is freed in an abandoned segment, the segment
+is reclaimed into that thread.
 ----------------------------------------------------------- */

-// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
-// to put in a tag that increments on update to avoid the A-B-A problem.
-#define MI_TAGGED_MASK   MI_SEGMENT_MASK
-typedef uintptr_t        mi_tagged_segment_t;
+// Maintain these for debug purposes
+static mi_decl_cache_align _Atomic(size_t)abandoned_count;

-static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) {
-  return (mi_segment_t*)(ts & ~MI_TAGGED_MASK);
-}

-static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) {
-  mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0);
-  uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK;
-  return ((uintptr_t)segment | tag);
-}
-
-// This is a list of visited abandoned pages that were full at the time.
-// this list migrates to `abandoned` when that becomes NULL. The use of
-// this list reduces contention and the rate at which segments are visited.
-static mi_decl_cache_align _Atomic(mi_segment_t*)       abandoned_visited; // = NULL
-
-// The abandoned page list (tagged as it supports pop)
-static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned;         // = NULL
-
-// Maintain these for debug purposes (these counts may be a bit off)
-static mi_decl_cache_align _Atomic(size_t)           abandoned_count;
-static mi_decl_cache_align _Atomic(size_t)           abandoned_visited_count;
-
-// We also maintain a count of current readers of the abandoned list
-// in order to prevent resetting/decommitting segment memory if it might
-// still be read.
-static mi_decl_cache_align _Atomic(size_t)           abandoned_readers; // = 0
-
-// Push on the visited list
-static void mi_abandoned_visited_push(mi_segment_t* segment) {
-  mi_assert_internal(segment->thread_id == 0);
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL);
-  mi_assert_internal(segment->next == NULL);
-  mi_assert_internal(segment->used > 0);
-  mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited);
-  do {
-    mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext);
-  } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment));
-  mi_atomic_increment_relaxed(&abandoned_visited_count);
-}
-
-// Move the visited list to the abandoned list.
-static bool mi_abandoned_visited_revisit(void)
-{
-  // quick check if the visited list is empty
-  if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false;
-
-  // grab the whole visited list
-  mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL);
-  if (first == NULL) return false;
-
-  // first try to swap directly if the abandoned list happens to be NULL
-  mi_tagged_segment_t afirst;
-  mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
-  if (mi_tagged_segment_ptr(ts)==NULL) {
-    size_t count = mi_atomic_load_relaxed(&abandoned_visited_count);
-    afirst = mi_tagged_segment(first, ts);
-    if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) {
-      mi_atomic_add_relaxed(&abandoned_count, count);
-      mi_atomic_sub_relaxed(&abandoned_visited_count, count);
-      return true;
-    }
-  }
-
-  // find the last element of the visited list: O(n)
-  mi_segment_t* last = first;
-  mi_segment_t* next;
-  while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) {
-    last = next;
-  }
-
-  // and atomically prepend to the abandoned list
-  // (no need to increase the readers as we don't access the abandoned segments)
-  mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned);
-  size_t count;
-  do {
-    count = mi_atomic_load_relaxed(&abandoned_visited_count);
-    mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext));
-    afirst = mi_tagged_segment(first, anext);
-  } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst));
-  mi_atomic_add_relaxed(&abandoned_count, count);
-  mi_atomic_sub_relaxed(&abandoned_visited_count, count);
-  return true;
-}
-
-// Push on the abandoned list.
-static void mi_abandoned_push(mi_segment_t* segment) {
-  mi_assert_internal(segment->thread_id == 0);
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
-  mi_assert_internal(segment->next == NULL);
-  mi_assert_internal(segment->used > 0);
-  mi_tagged_segment_t next;
-  mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
-  do {
-    mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts));
-    next = mi_tagged_segment(segment, ts);
-  } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next));
-  mi_atomic_increment_relaxed(&abandoned_count);
-}
-
-// Wait until there are no more pending reads on segments that used to be in the abandoned list
-// called for example from `arena.c` before decommitting
+// legacy: Wait until there are no more pending reads on segments that used to be in the abandoned list
 void _mi_abandoned_await_readers(void) {
-  size_t n;
-  do {
-    n = mi_atomic_load_acquire(&abandoned_readers);
-    if (n != 0) mi_atomic_yield();
-  } while (n != 0);
-}
-
-// Pop from the abandoned list
-static mi_segment_t* mi_abandoned_pop(void) {
-  mi_segment_t* segment;
-  // Check efficiently if it is empty (or if the visited list needs to be moved)
-  mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
-  segment = mi_tagged_segment_ptr(ts);
-  if mi_likely(segment == NULL) {
-    if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL
-      return NULL;
-    }
-  }
-
-  // Do a pop. We use a reader count to prevent
-  // a segment to be decommitted while a read is still pending,
-  // and a tagged pointer to prevent A-B-A link corruption.
-  // (this is called from `region.c:_mi_mem_free` for example)
-  mi_atomic_increment_relaxed(&abandoned_readers);  // ensure no segment gets decommitted
-  mi_tagged_segment_t next = 0;
-  ts = mi_atomic_load_acquire(&abandoned);
-  do {
-    segment = mi_tagged_segment_ptr(ts);
-    if (segment != NULL) {
-      mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next);
-      next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
-    }
-  } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next));
-  mi_atomic_decrement_relaxed(&abandoned_readers);  // release reader lock
-  if (segment != NULL) {
-    mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
-    mi_atomic_decrement_relaxed(&abandoned_count);
-  }
-  return segment;
+  // nothing needed
 }

 /* -----------------------------------------------------------
@ -1211,10 +1058,9 @@ static mi_segment_t* mi_abandoned_pop(void) {
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
  mi_assert_internal(segment->used == segment->abandoned);
  mi_assert_internal(segment->used > 0);
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
  mi_assert_internal(segment->abandoned_visits == 0);
  mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  
+
  // remove the free pages from the free page queues
  mi_slice_t* slice = &segment->slices[0];
  const mi_slice_t* end = mi_segment_slices_end(segment);
@ -1229,15 +1075,14 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
  }

  // perform delayed decommits (forcing is much slower on mstress)
-  mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats);    
-  
+  mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats);
+
  // all pages in the segment are abandoned; add it to the abandoned list
  _mi_stat_increase(&tld->stats->segments_abandoned, 1);
  mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
  segment->thread_id = 0;
-  mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
  segment->abandoned_visits = 1;   // from 0 to 1 to signify it is abandoned
-  mi_abandoned_push(segment);
+  _mi_arena_segment_mark_abandoned(segment->memid); mi_atomic_increment_relaxed(&abandoned_count);
 }

 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@ -1247,7 +1092,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
  mi_segment_t* segment = _mi_page_segment(page);

  mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  segment->abandoned++;  
+  segment->abandoned++;

  _mi_stat_increase(&tld->stats->pages_abandoned, 1);
  mi_assert_internal(segment->abandoned <= segment->used);
@ -1270,12 +1115,12 @@ static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice
 }

 // Possibly free pages and check if free space is available
-static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld) 
+static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld)
 {
  mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
  mi_assert_internal(mi_segment_is_abandoned(segment));
  bool has_page = false;
-  
+
  // for all slices
  const mi_slice_t* end;
  mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
@ -1287,7 +1132,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
      mi_page_t* const page = mi_slice_to_page(slice);
      _mi_page_free_collect(page, false);
      if (mi_page_all_free(page)) {
-        // if this page is all free now, free it without adding to any queues (yet) 
+        // if this page is all free now, free it without adding to any queues (yet)
        mi_assert_internal(page->next == NULL && page->prev==NULL);
        _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
        segment->abandoned--;
@ -1302,7 +1147,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
          // a page has available free blocks of the right size
          has_page = true;
        }
-      }      
+      }
    }
    else {
      // empty span
@ -1318,7 +1163,6 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
 // Reclaim an abandoned segment; returns NULL if the segment was freed
 // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
 static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
  mi_assert_expensive(mi_segment_is_valid(segment, tld));
  if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }

@ -1327,7 +1171,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
  mi_segments_track_size((long)mi_segment_size(segment), tld);
  mi_assert_internal(segment->next == NULL);
  _mi_stat_decrease(&tld->stats->segments_abandoned, 1);
-  
+
  // for all slices
  const mi_slice_t* end;
  mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
@ -1378,10 +1222,23 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
  }
 }

+// attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`)
+bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
+  if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false;  // it is not abandoned
+  if (_mi_arena_segment_clear_abandoned(segment->memid)) {  // atomically unabandon
+    mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, NULL, &heap->tld->segments);
+    mi_assert_internal(res != NULL);
+    return (res != NULL);
+  }
+  return false;
+}

 void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
  mi_segment_t* segment;
-  while ((segment = mi_abandoned_pop()) != NULL) {
+  mi_arena_id_t current_id = 0;
+  size_t        current_idx = 0;
+  while ((segment = _mi_arena_segment_clear_abandoned_next(&current_id, &current_idx)) != NULL) {
+    mi_atomic_decrement_relaxed(&abandoned_count);
    mi_segment_reclaim(segment, heap, 0, NULL, tld);
  }
 }
@ -1390,8 +1247,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
 {
  *reclaimed = false;
  mi_segment_t* segment;
-  long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024);     // limit the work to bound allocation times
-  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
+  mi_arena_id_t current_id = 0;
+  size_t        current_idx = 0;
+  long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 1024);     // limit the work to bound allocation times
+  while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current_id, &current_idx)) != NULL))
+  {
+    mi_atomic_decrement_relaxed(&abandoned_count);
    segment->abandoned_visits++;
    // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
    // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way?
@ -1406,19 +1267,20 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
      mi_segment_reclaim(segment, heap, 0, NULL, tld);
    }
    else if (has_page && is_suitable) {
-      // found a large enough free span, or a page of the right block_size with free space 
+      // found a large enough free span, or a page of the right block_size with free space
      // we return the result of reclaim (which is usually `segment`) as it might free
      // the segment due to concurrent frees (in which case `NULL` is returned).
      return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
    }
-    else if (segment->abandoned_visits > 3 && is_suitable) {  
+    else if (segment->abandoned_visits > 3 && is_suitable) {
      // always reclaim on 3rd visit to limit the abandoned queue length.
      mi_segment_reclaim(segment, heap, 0, NULL, tld);
    }
    else {
      // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again
-      mi_abandoned_visited_push(segment);
+      mi_segment_try_purge(segment, false /* true force? */, tld->stats); // force purge if needed as we may not visit soon again
+      mi_atomic_increment_relaxed(&abandoned_count);
+      _mi_arena_segment_mark_abandoned(segment->memid);
    }
  }
  return NULL;
@ -1428,11 +1290,11 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
 void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
 {
  mi_segment_t* segment;
-  int max_tries = (force ? 16*1024 : 1024); // limit latency
-  if (force) {
-    mi_abandoned_visited_revisit(); 
-  }
-  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
+  mi_arena_id_t current_id = 0;
+  size_t        current_idx = 0;
+  int max_tries = (force ? 16*1024 : 1024); // limit latency  
+  while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current_id,&current_idx)) != NULL)) {
+    mi_atomic_decrement_relaxed(&abandoned_count);
    mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees)
    if (segment->used == 0) {
      // free the segment (by forced reclaim) to make it available to other threads.
@ -1441,10 +1303,11 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
      mi_segment_reclaim(segment, heap, 0, NULL, tld);
    }
    else {
-      // otherwise, purge if needed and push on the visited list 
+      // otherwise, purge if needed and push on the visited list
      // note: forced purge can be expensive if many threads are destroyed/created as in mstress.
      mi_segment_try_purge(segment, force, tld->stats);
-      mi_abandoned_visited_push(segment);
+      mi_atomic_increment_relaxed(&abandoned_count);
+      _mi_arena_segment_mark_abandoned(segment->memid);
    }
  }
 }
@ -1457,7 +1320,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
 {
  mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
  mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
-  
+
  // 1. try to reclaim an abandoned segment
  bool reclaimed;
  mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);
@ -1471,7 +1334,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
    return segment;
  }
  // 2. otherwise allocate a fresh segment
-  return mi_segment_alloc(0, 0, heap->arena_id, tld, os_tld, NULL);  
+  return mi_segment_alloc(0, 0, heap->arena_id, tld, os_tld, NULL);
 }


@ -1492,7 +1355,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
    // no free page, allocate a new segment and try again
    if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) {
      // OOM or reclaimed a good page in the heap
-      return NULL;  
+      return NULL;
    }
    else {
      // otherwise try again
@ -1517,27 +1380,27 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
  mi_segment_t* segment = mi_segment_alloc(size,page_alignment,req_arena_id,tld,os_tld,&page);
  if (segment == NULL || page==NULL) return NULL;
  mi_assert_internal(segment->used==1);
-  mi_assert_internal(mi_page_block_size(page) >= size);  
+  mi_assert_internal(mi_page_block_size(page) >= size);
  #if MI_HUGE_PAGE_ABANDON
  segment->thread_id = 0; // huge segments are immediately abandoned
-  #endif  
+  #endif

  // for huge pages we initialize the xblock_size as we may
  // overallocate to accommodate large alignments.
  size_t psize;
  uint8_t* start = _mi_segment_page_start(segment, page, &psize);
  page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize);
-  
+
  // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE)
  if (page_alignment > 0 && segment->allow_decommit) {
    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment);
    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
-    mi_assert_internal(psize - (aligned_p - start) >= size);      
+    mi_assert_internal(psize - (aligned_p - start) >= size);
    uint8_t* decommit_start = start + sizeof(mi_block_t);              // for the free list
    ptrdiff_t decommit_size = aligned_p - decommit_start;
-    _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments    
+    _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments
  }
-  
+
  return page;
 }

@ -1609,7 +1472,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
    page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
  }
  else {
-    page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld);    
+    page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld);
  }
  mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid));
  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));