merge from dev-slice

2025-08-25 16:54:47 +03:00 · 2022-02-03 16:00:59 -08:00 · 2022-02-03 16:00:59 -08:00 · 6ea598f1c4
commit 6ea598f1c4
parent d4ae01bcd0 0dd5a2e0a5
22 changed files with 284 additions and 141 deletions
--- a/src/alloc-override-osx.c
+++ b/src/alloc-override-osx.c
@ -183,6 +183,10 @@ static boolean_t intro_zone_locked(malloc_zone_t* zone) {
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 #endif

+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#endif
+
 static malloc_introspection_t mi_introspect = {
  .enumerator = &intro_enumerator,
  .good_size = &intro_good_size,
@ -213,7 +217,7 @@ static malloc_zone_t mi_malloc_zone = {
  .batch_free = &zone_batch_free,
  .introspect = &mi_introspect,  
 #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
-  #if defined(MAC_OS_X_VERSION_10_7) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
+  #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
  .version = 10,
  #else
  .version = 9,
@ -222,7 +226,7 @@ static malloc_zone_t mi_malloc_zone = {
  .memalign = &zone_memalign,
  .free_definite_size = &zone_free_definite_size,
  .pressure_relief = &zone_pressure_relief,
-  #if defined(MAC_OS_X_VERSION_10_7) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
+  #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
  .claimed_address = &zone_claimed_address,
  #endif
 #else
--- a/src/alloc.c
+++ b/src/alloc.c
@ -553,12 +553,12 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
 // Free a block 
 void mi_free(void* p) mi_attr_noexcept
 {
-  const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
+  mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
  if (mi_unlikely(segment == NULL)) return; 

  mi_threadid_t tid = _mi_thread_id();
  mi_page_t* const page = _mi_segment_page_of(segment, p);
-
+  
  if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
    // local, and not full or aligned
    mi_block_t* block = (mi_block_t*)(p);
--- a/src/heap.c
+++ b/src/heap.c
@ -115,17 +115,20 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
 static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
 {
  if (heap==NULL || !mi_heap_is_initialized(heap)) return;
-  _mi_deferred_free(heap, collect >= MI_FORCE);
+
+  const bool force = collect >= MI_FORCE;  
+  _mi_deferred_free(heap, force);

  // note: never reclaim on collect but leave it to threads that need storage to reclaim 
-  if (
-  #ifdef NDEBUG
+  const bool force_main = 
+    #ifdef NDEBUG
      collect == MI_FORCE
-  #else
+    #else
      collect >= MI_FORCE
-  #endif
-    && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim)
-  {
+    #endif
+      && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim;
+
+  if (force_main) {
    // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
    // if all memory is freed by now, all segments should be freed.
    _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
@ -141,19 +144,27 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
  _mi_heap_delayed_free(heap);

  // collect retired pages
-  _mi_heap_collect_retired(heap, collect >= MI_FORCE);
+  _mi_heap_collect_retired(heap, force);

  // collect all pages owned by this thread
  mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
  mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );

-  // collect segment caches
-  if (collect >= MI_FORCE) {
+  // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list)
+  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
+  _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments);
+
+  // collect segment local caches
+  if (force) {
    _mi_segment_thread_collect(&heap->tld->segments);
  }

+  // decommit in global segment caches
+  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
+  _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os);  
+
  // collect regions on program-exit (or shared library unload)
-  if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
+  if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
    //_mi_mem_collect(&heap->tld->os);
  }
 }
--- a/src/init.c
+++ b/src/init.c
@ -466,7 +466,9 @@ static void mi_process_load(void) {
  MI_UNUSED(dummy);
  #endif
  os_preloading = false;
-  atexit(&mi_process_done);
+  #if !(defined(_WIN32) && defined(MI_SHARED_LIB))  // use Dll process detach (see below) instead of atexit (issue #521)
+  atexit(&mi_process_done);  
+  #endif
  _mi_options_init();
  mi_process_init();
  //mi_stats_reset();-
@ -553,11 +555,13 @@ static void mi_process_done(void) {
  FlsFree(mi_fls_key);  // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208
  #endif
  
-  #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)  
-  // free all memory if possible on process exit. This is not needed for a stand-alone process
-  // but should be done if mimalloc is statically linked into another shared library which
-  // is repeatedly loaded/unloaded, see issue #281.
-  mi_collect(true /* force */ );
+  #ifndef MI_SKIP_COLLECT_ON_EXIT
+    #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)  
+    // free all memory if possible on process exit. This is not needed for a stand-alone process
+    // but should be done if mimalloc is statically linked into another shared library which
+    // is repeatedly loaded/unloaded, see issue #281.
+    mi_collect(true /* force */ );
+    #endif
  #endif

  if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
@ -578,9 +582,14 @@ static void mi_process_done(void) {
    if (reason==DLL_PROCESS_ATTACH) {
      mi_process_load();
    }
-    else if (reason==DLL_THREAD_DETACH) {
-      if (!mi_is_redirected()) mi_thread_done();
+    else if (reason==DLL_PROCESS_DETACH) {
+      mi_process_done();
    }
+    else if (reason==DLL_THREAD_DETACH) {
+      if (!mi_is_redirected()) {
+        mi_thread_done();
+      }
+    }    
    return TRUE;
  }

@ -599,7 +608,7 @@ static void mi_process_done(void) {
    __pragma(comment(linker, "/include:" "__mi_msvc_initu"))
  #endif
  #pragma data_seg(".CRT$XIU")
-  extern "C" _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init };
+  mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init };
  #pragma data_seg()

 #elif defined(__cplusplus)
--- a/src/options.c
+++ b/src/options.c
@ -49,54 +49,50 @@ typedef struct mi_option_desc_s {
  mi_init_t   init;   // is it initialized yet? (from the environment)
  mi_option_t option; // for debugging: the option index should match the option
  const char* name;   // option name without `mimalloc_` prefix
+  const char* legacy_name; // potential legacy v1.x option name
 } mi_option_desc_t;

-#define MI_OPTION(opt)        mi_option_##opt, #opt
-#define MI_OPTION_DESC(opt)   {0, UNINIT, MI_OPTION(opt) }
+#define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
+#define MI_OPTION_LEGACY(opt,legacy)    mi_option_##opt, #opt, #legacy

 static mi_option_desc_t options[_mi_option_last] =
 {
  // stable options
-#if MI_DEBUG || defined(MI_SHOW_ERRORS)
+  #if MI_DEBUG || defined(MI_SHOW_ERRORS)
  { 1, UNINIT, MI_OPTION(show_errors) },
-#else
+  #else
  { 0, UNINIT, MI_OPTION(show_errors) },
-#endif
+  #endif
  { 0, UNINIT, MI_OPTION(show_stats) },
  { 0, UNINIT, MI_OPTION(verbose) },

-  // the following options are experimental and not all combinations make sense.
+  // Some of the following options are experimental and not all combinations are valid. Use with care.
  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
-  #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
-  { 0, UNINIT, MI_OPTION(eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
-  #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // legacy; ignored now and reset always uses MADV_FREE/MADV_DONTNEED (issue #518)
-  #endif
+  { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) },
+  { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) },
  { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
  { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
  { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
  { 0, UNINIT, MI_OPTION(reserve_os_memory)     },
  { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
  { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
-  { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
-  { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
-#if defined(__NetBSD__)
+  { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates  
+  { 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
+  #if defined(__NetBSD__)
  { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-#elif defined(_WIN32)
+  #elif defined(_WIN32)
  { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
-#else
+  #else
  { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
-#endif
-  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 25,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
-  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
+  #endif
+  { 25,   UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds
  { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
  { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
  { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
  { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
-  { 16,   UNINIT, MI_OPTION(max_warnings) }       // maximum warnings that are output
+  { 16,   UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
+  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
+  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) } // decommit delay in milli-seconds for freed segments
 };

 static void mi_option_init(mi_option_desc_t* desc);
@ -597,11 +593,21 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {

 static void mi_option_init(mi_option_desc_t* desc) {  
  // Read option value from the environment
+  char s[64+1];
  char buf[64+1];
  mi_strlcpy(buf, "mimalloc_", sizeof(buf));
  mi_strlcat(buf, desc->name, sizeof(buf));
-  char s[64+1];
-  if (mi_getenv(buf, s, sizeof(s))) {
+  bool found = mi_getenv(buf,s,sizeof(s));
+  if (!found && desc->legacy_name != NULL) {
+    mi_strlcpy(buf, "mimalloc_", sizeof(buf));
+    mi_strlcat(buf, desc->legacy_name, sizeof(buf));
+    found = mi_getenv(buf,s,sizeof(s));
+    if (found) {
+      _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name );
+    }    
+  }
+
+  if (found) {
    size_t len = strlen(s);
    if (len >= sizeof(buf)) len = sizeof(buf) - 1;
    for (size_t i = 0; i < len; i++) {
--- a/src/page-queue.c
+++ b/src/page-queue.c
@ -53,7 +53,7 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
 // Returns MI_BIN_HUGE if the size is too large.
 // We use `wsize` for the size in "machine word sizes",
 // i.e. byte size == `wsize*sizeof(void*)`.
-extern inline uint8_t _mi_bin(size_t size) {
+static inline uint8_t mi_bin(size_t size) {
  size_t wsize = _mi_wsize_from_size(size);
  uint8_t bin;
  if (wsize <= 1) {
@ -98,6 +98,10 @@ extern inline uint8_t _mi_bin(size_t size) {
  Queue of pages with free blocks
 ----------------------------------------------------------- */

+uint8_t _mi_bin(size_t size) {
+  return mi_bin(size);
+}
+
 size_t _mi_bin_size(uint8_t bin) {
  return _mi_heap_empty.pages[bin].block_size;
 }
@ -105,7 +109,7 @@ size_t _mi_bin_size(uint8_t bin) {
 // Good size for allocation
 size_t mi_good_size(size_t size) mi_attr_noexcept {
  if (size <= MI_MEDIUM_OBJ_SIZE_MAX) {
-    return _mi_bin_size(_mi_bin(size));
+    return _mi_bin_size(mi_bin(size));
  }
  else {
    return _mi_align_up(size,_mi_os_page_size());
@ -134,7 +138,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
 #endif

 static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
-  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
+  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
  mi_heap_t* heap = mi_page_heap(page);
  mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
  mi_page_queue_t* pq = &heap->pages[bin];
@ -144,7 +148,7 @@ static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
 }

 static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
-  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
+  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
  mi_assert_internal(bin <= MI_BIN_FULL);
  mi_page_queue_t* pq = &heap->pages[bin];
  mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
@ -177,9 +181,9 @@ static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_que
  }
  else {
    // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped
-    uint8_t bin = _mi_bin(size);
+    uint8_t bin = mi_bin(size);
    const mi_page_queue_t* prev = pq - 1;
-    while( bin == _mi_bin(prev->block_size) && prev > &heap->pages[0]) {
+    while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) {
      prev--;
    }
    start = 1 + _mi_wsize_from_size(prev->block_size);
--- a/src/page.c
+++ b/src/page.c
@ -587,14 +587,17 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
  // calculate the extend count
  const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size);
  size_t extend = page->reserved - page->capacity;
-  size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
-  if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND;
+  mi_assert_internal(extend > 0);

+  size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
+  if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
+  mi_assert_internal(max_extend > 0);
+    
  if (extend > max_extend) {
    // ensure we don't touch memory beyond the page to reduce page commit.
    // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
-    extend = (max_extend==0 ? 1 : max_extend);
-  }  
+    extend = max_extend;
+  }

  mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
  mi_assert_internal(extend < (1UL<<16));
@ -783,7 +786,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
 // that frees the block can free the whole page and segment directly.
 static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
  size_t block_size = _mi_os_good_alloc_size(size);
-  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
+  mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE);
  bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX);
  mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
  mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size);
--- a/src/random.c
+++ b/src/random.c
@ -239,7 +239,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
  if (mi_atomic_load_acquire(&no_getrandom)==0) {
    ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
    if (ret >= 0) return (buf_len == (size_t)ret);
-    if (ret != ENOSYS) return false;
+    if (errno != ENOSYS) return false;
    mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom
  }
 #endif
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@ -115,24 +115,26 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo

 #define MI_MAX_PURGE_PER_PUSH  (4)

-static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
+static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld)
 {
  MI_UNUSED(tld);
+  if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
  mi_msecs_t now = _mi_clock_now();
-  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
  size_t purged = 0;
-  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
+  const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
+  size_t idx              = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
+  for (size_t visited = 0; visited < max_visits; visited++,idx++) {  // visit N slots
    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
    mi_cache_slot_t* slot = &cache[idx];
    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
-    if (expire != 0 && now >= expire) {  // racy read
+    if (expire != 0 && (force || now >= expire)) {  // racy read
      // seems expired, first claim it from available
      purged++;
      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
      if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
        // was available, we claimed it
        expire = mi_atomic_loadi64_acquire(&slot->expire);
-        if (expire != 0 && now >= expire) {  // safe read
+        if (expire != 0 && (force || now >= expire)) {  // safe read
          // still expired, decommit it
          mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
          mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
@ -144,11 +146,15 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
        }
        _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
      }
-      if (purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
+      if (!force && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
    }
  }
 }

+void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
+  mi_segment_cache_purge(force, tld );
+}
+
 mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
@ -167,7 +173,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
  }

  // purge expired entries
-  mi_segment_cache_purge(tld);
+  mi_segment_cache_purge(false /* force? */, tld);

  // find an available slot
  mi_bitmap_index_t bitidx;
--- a/src/segment.c
+++ b/src/segment.c
@ -538,7 +538,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
  }
  // increase expiration of reusing part of the delayed decommit
  if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) {
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
  }
  // always undo delayed decommits
  mi_commit_mask_clear(&segment->decommit_mask, &mask);
@ -554,7 +554,7 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_

 static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
  if (!segment->allow_decommit) return;
-  if (mi_option_get(mi_option_reset_delay) == 0) {
+  if (mi_option_get(mi_option_decommit_delay) == 0) {
    mi_segment_commitx(segment, false, p, size, stats);
  }
  else {
@ -569,21 +569,20 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
    mi_commit_mask_t cmask;
    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
    mi_commit_mask_set(&segment->decommit_mask, &cmask);
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
-    mi_msecs_t now = _mi_clock_now();
+    mi_msecs_t now = _mi_clock_now();    
    if (segment->decommit_expire == 0) {
      // no previous decommits, initialize now
      mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
-      segment->decommit_expire = now + mi_option_get(mi_option_reset_delay);
+      segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay);
    }
    else if (segment->decommit_expire <= now) {
      // previous decommit mask already expired
      // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + (mi_option_get(mi_option_reset_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+      segment->decommit_expire = now + (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
    }
    else {
-      // previous decommit mask is not yet expired
-      // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_reset_delay);
+      // previous decommit mask is not yet expired, increase the expiration by a bit.
+      segment->decommit_expire += (mi_option_get(mi_option_decommit_delay) / 8);
    }
  }  
 }
@ -877,7 +876,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
    segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
    segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
    if (segment->allow_decommit) {
-      segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+      segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
      segment->decommit_mask = decommit_mask;
      mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
      #if MI_DEBUG>2
@ -1050,7 +1049,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 Abandonment

 When threads terminate, they can leave segments with
-live blocks (reached through other threads). Such segments
+live blocks (reachable through other threads). Such segments
 are "abandoned" and will be reclaimed by other threads to
 reuse their pages and/or free them eventually

@ -1065,11 +1064,11 @@ or decommitting segments that have a pending read operation.

 Note: the current implementation is one possible design;
 another way might be to keep track of abandoned segments
-in the regions. This would have the advantage of keeping
+in the arenas/segment_cache's. This would have the advantage of keeping
 all concurrent code in one place and not needing to deal
 with ABA issues. The drawback is that it is unclear how to
 scan abandoned segments efficiently in that case as they
-would be spread among all other segments in the regions.
+would be spread among all other segments in the arenas.
 ----------------------------------------------------------- */

 // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
@ -1245,7 +1244,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
  }

  // perform delayed decommits
-  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset) /* force? */, tld->stats);    
+  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats);    
  
  // all pages in the segment are abandoned; add it to the abandoned list
  _mi_stat_increase(&tld->stats->segments_abandoned, 1);
@ -1431,7 +1430,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
    }
    else {
      // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_decommit(segment, true, tld->stats); // decommit if needed
+      mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again
      mi_abandoned_visited_push(segment);
    }
  }
@ -1439,6 +1438,30 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
 }


+void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
+{
+  mi_segment_t* segment;
+  int max_tries = (force ? 16*1024 : 1024); // limit latency
+  if (force) {
+    mi_abandoned_visited_revisit(); 
+  }
+  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
+    mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees)
+    if (segment->used == 0) {
+      // free the segment (by forced reclaim) to make it available to other threads.
+      // note: we could in principle optimize this by skipping reclaim and directly
+      // freeing but that would violate some invariants temporarily)
+      mi_segment_reclaim(segment, heap, 0, NULL, tld);
+    }
+    else {
+      // otherwise, decommit if needed and push on the visited list 
+      // note: forced decommit can be expensive if many threads are destroyed/created as in mstress.
+      mi_segment_delayed_decommit(segment, force, tld->stats);
+      mi_abandoned_visited_push(segment);
+    }
+  }
+}
+
 /* -----------------------------------------------------------
   Reclaim or allocate
 ----------------------------------------------------------- */