merge from dev-exp; better abandoned reclamation

2025-05-08 00:09:31 +03:00 · 2020-01-27 22:12:23 -08:00 · 2020-01-27 22:12:23 -08:00 · b50bec463d
commit b50bec463d
parent a46d20a681 e628fc7067
25 changed files with 661 additions and 337 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -21,6 +21,10 @@ set(mi_sources
    src/random.c
    src/os.c
    src/arena.c
 <<<<<<< HEAD
 =======
    src/region.c
 >>>>>>> dev-exp
    src/segment.c
    src/page.c
    src/alloc.c
@ -106,10 +110,9 @@ endif()
 # Compiler flags
 if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
-  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas)
+  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
  if(CMAKE_C_COMPILER_ID MATCHES "GNU")
    list(APPEND mi_cflags -Wno-invalid-memory-model)
    list(APPEND mi_cflags -fvisibility=hidden)
  endif()
 endif()
--- a/ide/vs2017/mimalloc-override-test.vcxproj
+++ b/ide/vs2017/mimalloc-override-test.vcxproj
@ -112,7 +112,7 @@
      <ConformanceMode>true</ConformanceMode>
      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <ExceptionHandling>false</ExceptionHandling>
+      <ExceptionHandling>Sync</ExceptionHandling>
      <CompileAs>Default</CompileAs>
      <SupportJustMyCode>false</SupportJustMyCode>
    </ClCompile>
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@ -250,4 +250,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@ -74,4 +74,4 @@
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
-</Project>
+</Project>
--- a/ide/vs2017/mimalloc.vcxproj
+++ b/ide/vs2017/mimalloc.vcxproj
@ -111,7 +111,7 @@
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>Level4</WarningLevel>
      <Optimization>Disabled</Optimization>
      <SDLCheck>true</SDLCheck>
      <ConformanceMode>true</ConformanceMode>
@ -165,7 +165,7 @@
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>Level4</WarningLevel>
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <ConformanceMode>true</ConformanceMode>
@ -244,4 +244,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/ide/vs2017/mimalloc.vcxproj.filters
+++ b/ide/vs2017/mimalloc.vcxproj.filters
@ -80,4 +80,4 @@
      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
-</Project>
+</Project>
--- a/ide/vs2019/mimalloc-override-test.vcxproj
+++ b/ide/vs2019/mimalloc-override-test.vcxproj
@ -90,7 +90,7 @@
      <ConformanceMode>true</ConformanceMode>
      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <ExceptionHandling>false</ExceptionHandling>
+      <ExceptionHandling>Sync</ExceptionHandling>
      <CompileAs>Default</CompileAs>
      <SupportJustMyCode>false</SupportJustMyCode>
    </ClCompile>
@ -112,7 +112,7 @@
      <ConformanceMode>true</ConformanceMode>
      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <ExceptionHandling>false</ExceptionHandling>
+      <ExceptionHandling>Sync</ExceptionHandling>
      <CompileAs>Default</CompileAs>
      <SupportJustMyCode>false</SupportJustMyCode>
    </ClCompile>
--- a/ide/vs2019/mimalloc-override.vcxproj.filters
+++ b/ide/vs2019/mimalloc-override.vcxproj.filters
@ -75,4 +75,4 @@
      <UniqueIdentifier>{39cb7e38-69d0-43fb-8406-6a0f7cefc3b4}</UniqueIdentifier>
    </Filter>
  </ItemGroup>
-</Project>
+</Project>
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@ -100,7 +100,7 @@
      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
      <CompileAs>CompileAsCpp</CompileAs>
      <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>stdcpp17</LanguageStandard>
+      <LanguageStandard>Default</LanguageStandard>
    </ClCompile>
    <Lib>
      <AdditionalLibraryDirectories>
@ -119,7 +119,7 @@
      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
      <CompileAs>CompileAsCpp</CompileAs>
      <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>stdcpp17</LanguageStandard>
+      <LanguageStandard>Default</LanguageStandard>
    </ClCompile>
    <PostBuildEvent>
      <Command>
--- a/ide/vs2019/mimalloc.vcxproj.filters
+++ b/ide/vs2019/mimalloc.vcxproj.filters
@ -78,4 +78,4 @@
      <UniqueIdentifier>{852a14ae-6dde-4e95-8077-ca705e97e5af}</UniqueIdentifier>
    </Filter>
  </ItemGroup>
-</Project>
+</Project>
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@ -20,13 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_trace_message(...)
 #endif
 #define MI_CACHE_LINE          64
 #if defined(_MSC_VER)
-#pragma warning(disable:4127)   // constant conditional due to MI_SECURE paths
+#pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
-#define mi_decl_noinline   __declspec(noinline)
+#define mi_decl_noinline        __declspec(noinline)
-#elif defined(__GNUC__) || defined(__clang__)
+#define mi_decl_thread          __declspec(thread)
-#define mi_decl_noinline   __attribute__((noinline))
+#define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
 #elif (defined(__GNUC__) && (__GNUC__>=3))  // includes clang and icc
 #define mi_decl_noinline        __attribute__((noinline))
 #define mi_decl_thread          __thread
 #define mi_decl_cache_align     __attribute__((aligned(MI_CACHE_LINE)))
 #else
 #define mi_decl_noinline
 #define mi_decl_thread          __thread        // hope for the best :-)
 #define mi_decl_cache_align     
 #endif
@ -72,13 +79,15 @@ void      _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed,
 // "segment.c"
-mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
+mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
 void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
 void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
 bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
 void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
 uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
 void       _mi_abandoned_await_readers(void);
 // "page.c"
 void*      _mi_malloc_generic(mi_heap_t* heap, size_t size)  mi_attr_noexcept mi_attr_malloc;
@ -421,30 +430,24 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t*
  return mi_tf_make(block, mi_tf_delayed(tf));
 }
-// are all blocks in a page freed?
+// are all blocks in a page freed? 
 // note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
 static inline bool mi_page_all_free(const mi_page_t* page) {
  mi_assert_internal(page != NULL);
  return (page->used == 0);
 }
-// are there immediately available blocks
+// are there any available blocks? 
 static inline bool mi_page_has_any_available(const mi_page_t* page) {
  mi_assert_internal(page != NULL && page->reserved > 0);
  return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
 }
 // are there immediately available blocks, i.e. blocks available on the free list.
 static inline bool mi_page_immediate_available(const mi_page_t* page) {
  mi_assert_internal(page != NULL);
  return (page->free != NULL);
 }
 // are there free blocks in this page?
 static inline bool mi_page_has_free(mi_page_t* page) {
  mi_assert_internal(page != NULL);
  bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL));
  mi_assert_internal(hasfree || page->used == page->capacity);
  return hasfree;
 }
 // are all blocks in use?
 static inline bool mi_page_all_used(mi_page_t* page) {
  mi_assert_internal(page != NULL);
  return !mi_page_has_free(page);
 }
 // is more than 7/8th of a page in use?
 static inline bool mi_page_mostly_used(const mi_page_t* page) {
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@ -275,6 +275,7 @@ typedef struct mi_segment_s {
  struct mi_segment_s* abandoned_next;
  size_t            abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
  size_t            abandoned_visits;   // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
  size_t            used;               // count of pages in use
  uintptr_t         cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`  
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -38,14 +38,12 @@ terms of the MIT license. A copy of the license can be found in the file
    #define mi_decl_allocator         __declspec(restrict)
  #endif
  #define mi_cdecl                    __cdecl
  #define mi_decl_thread              __declspec(thread)
  #define mi_attr_malloc
  #define mi_attr_alloc_size(s)
  #define mi_attr_alloc_size2(s1,s2)
  #define mi_attr_alloc_align(p)
-#elif defined(__GNUC__) || defined(__clang__)
+#elif defined(__GNUC__)               // includes clang and icc
  #define mi_cdecl                    // leads to warnings... __attribute__((cdecl))
  #define mi_decl_thread              __thread
  #define mi_decl_export              __attribute__((visibility("default")))
  #define mi_decl_allocator
  #define mi_attr_malloc              __attribute__((malloc))
@ -64,7 +62,6 @@ terms of the MIT license. A copy of the license can be found in the file
  #endif
 #else
  #define mi_cdecl
  #define mi_decl_thread              __thread
  #define mi_decl_export
  #define mi_decl_allocator
  #define mi_attr_malloc
--- a/src/alloc.c
+++ b/src/alloc.c
@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Fast allocation in a page: just pop from the free list.
 // Fall back to generic allocation only if the list is empty.
-extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
+extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { 
  mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
  mi_block_t* block = page->free;
  if (mi_unlikely(block == NULL)) {
@ -291,7 +291,8 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
 }
-static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) {
+static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) {
  mi_page_t* page = _mi_segment_page_of(segment, p);
  mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
  _mi_free_block(page, local, block);
 }
@ -339,7 +340,7 @@ void mi_free(void* p) mi_attr_noexcept
  if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
    // local, and not full or aligned
-    mi_block_t* block = (mi_block_t*)p;
+    mi_block_t* const block = (mi_block_t*)p;
    if (mi_unlikely(mi_check_is_double_free(page,block))) return;
    mi_block_set_next(page, block, page->local_free);
    page->local_free = block;
@ -350,7 +351,8 @@ void mi_free(void* p) mi_attr_noexcept
  }
  else {
    // non-local, aligned blocks, or a full page; use the more generic path
-    mi_free_generic(segment, page, tid == segment->thread_id, p);
+    // note: recalc page in generic to improve code generation
    mi_free_generic(segment, tid == segment->thread_id, p);
  }
 }
--- a/src/arena.c
+++ b/src/arena.c
@ -77,8 +77,8 @@ typedef struct mi_arena_s {
 // The available arenas
-static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
+static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
-static _Atomic(uintptr_t)   mi_arena_count; // = 0
+static mi_decl_cache_align _Atomic(uintptr_t)   mi_arena_count; // = 0
 /* -----------------------------------------------------------
@ -114,6 +114,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
  size_t idx = mi_atomic_read(&arena->search_idx);  // start from last search
  for (size_t visited = 0; visited < fcount; visited++, idx++) {
    if (idx >= fcount) idx = 0;  // wrap around
    // try to atomically claim a range of bits
    if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
      mi_atomic_write(&arena->search_idx, idx);  // start search from here next time
      return true;
@ -213,6 +214,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
          // claimed! test again
          if (slot->is_committed && !slot->is_large && now >= slot->expire) {
            _mi_abandoned_await_readers();  // wait until safe to decommit
            _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
            slot->is_committed = false;
          }
@ -251,6 +253,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
        if (is_committed) {
          long delay = mi_option_get(mi_option_arena_reset_delay);
          if (delay == 0 && !is_large) {
            _mi_abandoned_await_readers(); // wait until safe to decommit
            _mi_os_decommit(start, size, tld->stats);
            slot->is_committed = false;
          }
@ -286,8 +289,8 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
    // always committed
    *commit = true;
  }
-  else if (commit) {
+  else if (*commit) {
-    // ensure commit now
+    // arena not committed as a whole, but commit requested: ensure commit now
    bool any_uncommitted;
    mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
    if (any_uncommitted) {
@ -379,6 +382,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
  if (memid == MI_MEMID_OS) {
    // was a direct OS allocation, pass through
    if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) {
      _mi_abandoned_await_readers(); // wait unti safe to free
      _mi_os_free_ex(p, size, is_committed, tld->stats);
    }
  }
--- a/src/heap.c
+++ b/src/heap.c
@ -76,9 +76,9 @@ static bool mi_heap_is_valid(mi_heap_t* heap) {
 ----------------------------------------------------------- */
 typedef enum mi_collect_e {
-  NORMAL,
+  MI_NORMAL,
-  FORCE,
+  MI_FORCE,
-  ABANDON
+  MI_ABANDON
 } mi_collect_t;
@ -87,12 +87,13 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
  UNUSED(heap);
  mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
  mi_collect_t collect = *((mi_collect_t*)arg_collect);
-  _mi_page_free_collect(page, collect >= ABANDON);
+  _mi_page_free_collect(page, collect >= MI_FORCE);
  if (mi_page_all_free(page)) {
-    // no more used blocks, free the page. TODO: should we retire here and be less aggressive?
+    // no more used blocks, free the page. 
-    _mi_page_free(page, pq, collect != NORMAL);
+    // note: this will free retired pages as well.
    _mi_page_free(page, pq, collect >= MI_FORCE);
  }
-  else if (collect == ABANDON) {
+  else if (collect == MI_ABANDON) {
    // still used blocks but the thread is done; abandon the page
    _mi_page_abandon(page, pq);
  }
@ -111,61 +112,55 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
 static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
 {
  if (!mi_heap_is_initialized(heap)) return;
-  _mi_deferred_free(heap, collect > NORMAL);
+  _mi_deferred_free(heap, collect >= MI_FORCE);
-  // collect (some) abandoned pages
+  // note: never reclaim on collect but leave it to threads that need storage to reclaim 
-  if (collect >= NORMAL && !heap->no_reclaim) {
+  if (
-    if (collect == NORMAL) {
+  #ifdef NDEBUG
-      // this may free some segments (but also take ownership of abandoned pages)
+      collect == MI_FORCE
-      _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments);
+  #else
-    }
+      collect >= MI_FORCE
-    else if (
+  #endif
-              #ifdef NDEBUG
+    && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim)
-              collect == FORCE
+  {
-              #else
+    // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
-              collect >= FORCE
+    // if all memory is freed by now, all segments should be freed.
-              #endif
+    _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
              && _mi_is_main_thread() && mi_heap_is_backing(heap))
    {
      // the main thread is abandoned, try to free all abandoned segments.
      // if all memory is freed by now, all segments should be freed.
      _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments);
    }
  }
  // if abandoning, mark all pages to no longer add to delayed_free
-  if (collect == ABANDON) {
+  if (collect == MI_ABANDON) {
    //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) {
    //  _mi_page_use_delayed_free(page, false);  // set thread_free.delayed to MI_NO_DELAYED_FREE
    //}
    mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
  }
  // free thread delayed blocks.
-  // (if abandoning, after this there are no more local references into the pages.)
+  // (if abandoning, after this there are no more thread-delayed references into the pages.)
  _mi_heap_delayed_free(heap);
  // collect all pages owned by this thread
  mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
-  mi_assert_internal( collect != ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL );
+  mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL );
  // collect segment caches
-  if (collect >= FORCE) {
+  if (collect >= MI_FORCE) {
    _mi_segment_thread_collect(&heap->tld->segments);
  }
  #ifndef NDEBUG
  // collect regions
-  if (collect >= FORCE && _mi_is_main_thread()) {
+  if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
-    // _mi_mem_collect(&heap->tld->stats);
+    //_mi_mem_collect(&heap->tld->os);
  }
  #endif
 }
 void _mi_heap_collect_abandon(mi_heap_t* heap) {
-  mi_heap_collect_ex(heap, ABANDON);
+  mi_heap_collect_ex(heap, MI_ABANDON);
 }
 void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept {
-  mi_heap_collect_ex(heap, (force ? FORCE : NORMAL));
+  mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL));
 }
 void mi_collect(bool force) mi_attr_noexcept {
@ -274,6 +269,9 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
  page->used = 0;
  // and free the page
  // mi_page_free(page,false);
  page->next = NULL;
  page->prev = NULL;
  _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
  return true; // keep going
--- a/src/init.c
+++ b/src/init.c
@ -165,6 +165,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
  Initialization and freeing of the thread local heaps
 ----------------------------------------------------------- */
 // note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size).
 typedef struct mi_thread_data_s {
  mi_heap_t  heap;  // must come first due to cast in `_mi_heap_done`
  mi_tld_t   tld;
@ -179,12 +180,13 @@ static bool _mi_heap_init(void) {
    mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
  }
  else {
-    // use `_mi_os_alloc` to allocate directly from the OS
+    // use `_mi_os_alloc` to allocate directly from the OS    
    mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation?
    if (td == NULL) {
      _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n");
      return false;
    }
    // OS allocated so already zero initialized
    mi_tld_t*  tld = &td->tld;
    mi_heap_t* heap = &td->heap;
    memcpy(tld, &tld_empty, sizeof(*tld));
@ -227,6 +229,7 @@ static bool _mi_heap_done(mi_heap_t* heap) {
  // free if not the main thread
  if (heap != &_mi_heap_main) {
    mi_assert_internal(heap->tld->segments.count == 0);
    _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
  }
 #if (MI_DEBUG > 0)
--- a/src/options.c
+++ b/src/options.c
@ -56,10 +56,10 @@ static mi_option_desc_t options[_mi_option_last] =
  { 0, UNINIT, MI_OPTION(verbose) },
  // the following options are experimental and not all combinations make sense.
-  { 0, UNINIT, MI_OPTION(eager_commit) },        // commit on demand
+  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit on demand?
  #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
  { 0, UNINIT, MI_OPTION(eager_region_commit) },
-  { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
+  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
  #else
  { 1, UNINIT, MI_OPTION(eager_region_commit) },
  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset uses MADV_FREE/MADV_DONTNEED
--- a/src/os.c
+++ b/src/os.c
@ -396,7 +396,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
 // On 64-bit systems, we can do efficient aligned allocation by using
 // the 4TiB to 30TiB area to allocate them.
 #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
-static volatile _Atomic(uintptr_t) aligned_base;
+static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
 // Return a 4MiB aligned address that is probably available
 static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
@ -904,7 +904,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 #if (MI_INTPTR_SIZE >= 8)
 // To ensure proper alignment, use our own area for huge OS pages
-static _Atomic(uintptr_t)  mi_huge_start; // = 0
+static mi_decl_cache_align _Atomic(uintptr_t)  mi_huge_start; // = 0
 // Claim an aligned address range for huge pages
 static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
--- a/src/page.c
+++ b/src/page.c
@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
 }
 static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
-
+static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
 #if (MI_DEBUG>=3)
 static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
@ -127,12 +127,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
  mi_thread_free_t tfreex;
  mi_delayed_t     old_delay;
  do {
-    tfree = mi_atomic_read(&page->xthread_free);
+    tfree = mi_atomic_read(&page->xthread_free);  // note: must acquire as we can break this loop and not do a CAS
    tfreex = mi_tf_set_delayed(tfree, delay);
    old_delay = mi_tf_delayed(tfree);
    if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
      mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
-      tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
+      // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
    }
    else if (delay == old_delay) {
      break; // avoid atomic operation if already equal
@ -140,7 +140,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
    else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
      break; // leave never-delayed flag set
    }
-  } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
+  } while ((old_delay == MI_DELAYED_FREEING) ||
           !mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
 }
 /* -----------------------------------------------------------
@ -235,8 +236,8 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
  mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
  mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
  mi_assert_internal(!page->is_reset);
  // TODO: push on full queue immediately if it is full?
  mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
  mi_page_queue_push(heap, pq, page);
  mi_assert_expensive(_mi_page_is_valid(page));
 }
@ -244,11 +245,14 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
 // allocate a fresh page from a segment
 static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
  mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq));
-  mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
+  mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os);
-  if (page == NULL) return NULL;
+  if (page == NULL) {
    // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
    return NULL;
  }
  mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
  mi_page_init(heap, page, block_size, heap->tld);
-  _mi_stat_increase( &heap->tld->stats.pages, 1);
+  _mi_stat_increase(&heap->tld->stats.pages, 1);
  if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
  mi_assert_expensive(_mi_page_is_valid(page));
  return page;
@ -257,19 +261,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
 // Get a fresh page to use
 static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
  mi_assert_internal(mi_heap_contains_queue(heap, pq));
-
+  mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size);
  // try to reclaim an abandoned page first
  mi_page_t* page = pq->first;
  if (!heap->no_reclaim &&
      _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments) &&
      page != pq->first)
  {
    // we reclaimed, and we got lucky with a reclaimed page in our queue
    page = pq->first;
    if (page->free != NULL) return page;
  }
  // otherwise allocate the page
  page = mi_page_fresh_alloc(heap, pq, pq->block_size);
  if (page==NULL) return NULL;
  mi_assert_internal(pq->block_size==mi_page_block_size(page));
  mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
@ -629,6 +621,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
  #endif
  page->is_zero = page->is_zero_init;
  mi_assert_internal(page->is_committed);
  mi_assert_internal(!page->is_reset);
  mi_assert_internal(page->capacity == 0);
  mi_assert_internal(page->free == NULL);
  mi_assert_internal(page->used == 0);
@ -654,7 +648,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
 -------------------------------------------------------------*/
 // Find a page with free blocks of `page->block_size`.
-static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
+static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
 {
  // search through the pages in "next fit" order
  size_t count = 0;
@ -692,13 +686,16 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
  if (page == NULL) {
    _mi_heap_collect_retired(heap, false); // perhaps make a page available
    page = mi_page_fresh(heap, pq);
    if (page == NULL && first_try) {
      // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
      page = mi_page_queue_find_free_ex(heap, pq, false);      
    }
  }
  else {
    mi_assert(pq->first == page);
    page->retire_expire = 0;
  }
  mi_assert_internal(page == NULL || mi_page_immediate_available(page));
  return page;
 }
@ -722,7 +719,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
      return page; // fast path
    }
  }
-  return mi_page_queue_find_free_ex(heap, pq);
+  return mi_page_queue_find_free_ex(heap, pq, true);
 }
--- a/src/random.c
+++ b/src/random.c
@ -176,7 +176,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
  return true;
 }
 */
-#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \
+#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
      defined(__wasi__)
 #include <stdlib.h>
@ -325,4 +325,4 @@ static void chacha_test(void)
  chacha_block(&r);
  mi_assert_internal(array_equals(r.output, r_out, 16));
 }
-*/
+*/
--- a/src/region.c
+++ b/src/region.c
@ -57,7 +57,7 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
 // Constants
 #if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 48KiB for the region map 
+#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 64KiB for the region map 
 #elif (MI_INTPTR_SIZE==4)
 #define MI_HEAP_REGION_MAX_SIZE    (3 * GiB)    // ~ KiB for the region map
 #else
@ -72,14 +72,13 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
 #define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
 #define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)  
-// Region info is a pointer to the memory region and two bits for 
+// Region info 
 // its flags: is_large, and is_committed.
 typedef union mi_region_info_u {
-  uintptr_t value;
+  uintptr_t value;      
  struct {
-    bool  valid;
+    bool  valid;        // initialized?
-    bool  is_large;
+    bool  is_large;     // allocated in fixed large/huge OS pages
-    short numa_node;
+    short numa_node;    // the associated NUMA node (where -1 means no associated node)
  } x;
 } mi_region_info_t;
@ -87,13 +86,14 @@ typedef union mi_region_info_u {
 // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
-  volatile _Atomic(uintptr_t)        info;        // is_large, and associated numa node + 1 (so 0 is no association)
+  volatile _Atomic(uintptr_t)        info;        // mi_region_info_t.value
-  volatile _Atomic(void*)            start;       // start of the memory area (and flags)
+  volatile _Atomic(void*)            start;       // start of the memory area 
  mi_bitmap_field_t                  in_use;      // bit per in-use block
  mi_bitmap_field_t                  dirty;       // track if non-zero per block
-  mi_bitmap_field_t                  commit;      // track if committed per block (if `!info.is_committed))
+  mi_bitmap_field_t                  commit;      // track if committed per block
-  mi_bitmap_field_t                  reset;       // track reset per block
+  mi_bitmap_field_t                  reset;       // track if reset per block
-  volatile _Atomic(uintptr_t)        arena_memid; // if allocated from a (huge page) arena-
+  volatile _Atomic(uintptr_t)        arena_memid; // if allocated from a (huge page) arena
  uintptr_t                          padding;     // round to 8 fields
 } mem_region_t;
 // The region map
@ -188,6 +188,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
  if (idx >= MI_REGION_MAX) {
    mi_atomic_decrement(&regions_count);
    _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB));
    return false;
  }
@ -239,11 +240,13 @@ static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large,
 {
  // try all regions for a free slot  
  const size_t count = mi_atomic_read(&regions_count);
-  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? 
+  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
  for (size_t visited = 0; visited < count; visited++, idx++) {
    if (idx >= count) idx = 0;  // wrap around
    mem_region_t* r = &regions[idx];
    // if this region suits our demand (numa node matches, large OS page matches)
    if (mi_region_is_suitable(r, numa_node, allow_large)) {
      // then try to atomically claim a segment(s) in this region
      if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
        tld->region_idx = idx;    // remember the last found position
        *region = r;
@ -263,15 +266,15 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
  // try to claim in existing regions
  if (!mi_region_try_claim(numa_node, blocks, *is_large, &region, &bit_idx, tld)) {
-    // otherwise try to allocate a fresh region
+    // otherwise try to allocate a fresh region and claim in there
    if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
      // out of regions or memory
      return NULL;
    }
  }
-  
+  // ------------------------------------------------
-  // found a region and claimed `blocks` at `bit_idx`
+  // found a region and claimed `blocks` at `bit_idx`, initialize them now
  mi_assert_internal(region != NULL);
  mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
@ -346,25 +349,27 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
  size = _mi_align_up(size, _mi_os_page_size());
  // allocate from regions if possible
  void* p = NULL;
  size_t arena_memid;
  const size_t blocks = mi_region_block_count(size);
  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
-    void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
+    p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);    
-    mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0);    
+    if (p == NULL) {
-    if (p != NULL) {
+      _mi_warning_message("unable to allocate from region: size %zu\n", size);
      #if (MI_DEBUG>=2)
      if (*commit) { ((uint8_t*)p)[0] = 0; }
      #endif
      return p;
    }
-    _mi_warning_message("unable to allocate from region: size %zu\n", size);
+  }
  if (p == NULL) {
    // and otherwise fall back to the OS
    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
    *memid = mi_memid_create_from_arena(arena_memid);
  }
-  // and otherwise fall back to the OS
+  if (p != NULL) {
-  void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
+    mi_assert_internal((uintptr_t)p % alignment == 0);
-  *memid = mi_memid_create_from_arena(arena_memid);
+#if (MI_DEBUG>=2)
-  mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
+    if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
-  if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; }
+#endif
  }
  return p;
 }
@ -419,6 +424,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
      bool any_unreset;
      mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
      if (any_unreset) {
        _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
        _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
      }
    }    
@ -451,7 +457,8 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
        memset(&regions[i], 0, sizeof(mem_region_t));
        // and release the whole region
        mi_atomic_write(&region->info, 0);
-        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {          
+        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {         
          _mi_abandoned_await_readers(); // ensure no pending reads
          _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
        }
      }
--- a/src/segment.c
+++ b/src/segment.c
@ -18,17 +18,16 @@ static void mi_segment_map_allocated_at(const mi_segment_t* segment);
 static void mi_segment_map_freed_at(const mi_segment_t* segment);
 static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
-/* -----------------------------------------------------------
+/* --------------------------------------------------------------------------------
  Segment allocation
-  In any case the memory for a segment is virtual and only
+  In any case the memory for a segment is virtual and usually committed on demand.
-  committed on demand (i.e. we are careful to not touch the memory
+  (i.e. we are careful to not touch the memory until we actually allocate a block there)
  until we actually allocate a block there)
  If a  thread ends, it "abandons" pages with used blocks
  and there is an abandoned segment list whose segments can
  be reclaimed by still running threads, much like work-stealing.
----------------------------------------------------------- */
+-------------------------------------------------------------------------------- */
 /* -----------------------------------------------------------
   Slices
@ -119,6 +118,12 @@ static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) {
 Invariant checking
 ----------------------------------------------------------- */
 static bool mi_slice_is_used(const mi_slice_t* slice) {
  return (slice->xblock_size > 0);
 }
 #if (MI_DEBUG>=3)
 static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) {
  for (mi_slice_t* s = sq->first; s != NULL; s = s->next) {
@ -142,7 +147,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
    mi_assert_internal(slice->slice_offset == 0);
    size_t index = mi_slice_index(slice);
    size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1;
-    if (slice->xblock_size > 0) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets
+    if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets
      used_count++;
      for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) {
        mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t));
@ -183,6 +188,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
 static size_t mi_segment_size(mi_segment_t* segment) {
  return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
 }
 static size_t mi_segment_info_size(mi_segment_t* segment) {
  return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
 }
@ -196,6 +202,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
  uint8_t* p    = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
  /*
  if (idx == 0) {
    // the first page starts after the segment info (and possible guard page)
    p += segment->segment_info_size;
    psize -= segment->segment_info_size;
@ -461,9 +468,16 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
  mi_assert_internal(segment->decommit_mask == 0);
 }
 static bool mi_segment_is_abandoned(mi_segment_t* segment) {
  return (segment->thread_id == 0);
 }
 // note: can be called on abandoned segments
 static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
  mi_assert_internal(slice_index < segment->slice_entries);
-  mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_span_queue_for(slice_count,tld));
+  mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) 
                          ? NULL : mi_span_queue_for(slice_count,tld));
  if (slice_count==0) slice_count = 1;
  mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries);
@ -487,6 +501,7 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
             else slice->xblock_size = 0; // mark huge page as free anyways
 }
 /*
 // called from reclaim to add existing free spans
 static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) {
  mi_segment_t* segment = _mi_ptr_segment(slice);
@ -494,6 +509,7 @@ static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld)
  size_t slice_index = mi_slice_index(slice);
  mi_segment_span_free(segment,slice_index,slice->slice_count,tld);
 }
 */
 static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) {
  mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->xblock_size==0);
@ -502,12 +518,11 @@ static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld
  mi_span_queue_delete(sq, slice);
 }
-
+// note: can be called on abandoned segments
 static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0 && slice->xblock_size > 0);
+  mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0);
  mi_segment_t* segment = _mi_ptr_segment(slice);
-  mi_assert_internal(segment->used > 0);
+  bool is_abandoned = mi_segment_is_abandoned(segment);
  segment->used--;
  // for huge pages, just mark as free but don't add to the queues
  if (segment->kind == MI_SEGMENT_HUGE) {
@ -524,7 +539,7 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
    // free next block -- remove it from free and merge
    mi_assert_internal(next->slice_count > 0 && next->slice_offset==0);
    slice_count += next->slice_count; // extend
-    mi_segment_span_remove_from_queue(next, tld);
+    if (!is_abandoned) { mi_segment_span_remove_from_queue(next, tld); }
  }
  if (slice > segment->slices) {
    mi_slice_t* prev = mi_slice_first(slice - 1);
@ -533,14 +548,13 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
      // free previous slice -- remove it from free and merge
      mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
      slice_count += prev->slice_count;
-      mi_segment_span_remove_from_queue(prev, tld);
+      if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); }
      slice = prev;
    }
  }
  // and add the new free page
  mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld);
  mi_assert_expensive(mi_segment_is_valid(segment, tld));
  return slice;
 }
@ -592,6 +606,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
  // ensure the memory is committed
  mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
  page->is_reset = false;
  page->is_committed = true;
  segment->used++;
  return page;
 }
@ -626,24 +641,25 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
 ----------------------------------------------------------- */
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
+static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
 {
  mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL));
  mi_assert_internal((segment==NULL) || (segment!=NULL && required==0));
  // calculate needed sizes first
  size_t info_slices;
  size_t pre_size;
-  size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
+  const size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
-  size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
+  const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
-  size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
+  const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
  // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
-  bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
+  const bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
-  bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
+  const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
  bool commit = eager || (required > 0); 
  // Try to get from our cache first
  mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld);
  bool is_zero = false;
-  bool commit_info_still_good = (segment != NULL);
+  const bool commit_info_still_good = (segment != NULL);
  if (segment==NULL) {
    // Allocate the segment from the OS
    bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
@ -660,8 +676,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
    }
    segment->memid = memid;
    segment->mem_is_fixed = mem_large;
-
+    segment->mem_is_committed = commit;
    segment->mem_is_committed = mi_option_is_enabled(mi_option_eager_commit); // commit;
    mi_segments_track_size((long)(segment_size), tld);
    mi_segment_map_allocated_at(segment);
  }
@ -719,10 +734,17 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices, tld);
  }
  mi_assert_expensive(mi_segment_is_valid(segment,tld));
  return segment;
 }
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
 static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) {
  return mi_segment_init(NULL, required, tld, os_tld, huge_page);
 }
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
  mi_assert_internal(segment != NULL);
  mi_assert_internal(segment->next == NULL);
@ -756,31 +778,6 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
  }
 }
 /* -----------------------------------------------------------
   Page allocation
 ----------------------------------------------------------- */
 static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
  mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
  // find a free page
  size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
  size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
  mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
  mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
  if (page==NULL) {
    // no free page, allocate a new segment and try again
    if (mi_segment_alloc(0, tld, os_tld, NULL) == NULL) return NULL;  // OOM
    return mi_segments_page_alloc(page_kind, required, tld, os_tld);
  }
  mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
  mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());  
  mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
  return page;
 }
 /* -----------------------------------------------------------
   Page Free
@ -788,17 +785,18 @@ static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t requir
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
 // note: can be called on abandoned pages
 static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
  mi_assert_internal(page->xblock_size > 0);
  mi_assert_internal(mi_page_all_free(page));
  mi_segment_t* segment = _mi_ptr_segment(page);
  mi_assert_internal(segment->used > 0);
  size_t inuse = page->capacity * mi_page_block_size(page);
  _mi_stat_decrease(&tld->stats->page_committed, inuse);
  _mi_stat_decrease(&tld->stats->pages, 1);
  // reset the page memory to reduce memory pressure?
  if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
    size_t psize;
    uint8_t* start = _mi_page_start(segment, page, &psize);
@ -813,7 +811,11 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
  page->xblock_size = 1;
  // and free it
-  return mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);  
+  mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);  
  segment->used--;
  // cannot assert segment valid as it is called during reclaim
  // mi_assert_expensive(mi_segment_is_valid(segment, tld));
  return slice;
 }
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
@ -825,6 +827,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
  // mark it as free now
  mi_segment_page_clear(page, tld);
  mi_assert_expensive(mi_segment_is_valid(segment, tld));
  if (segment->used == 0) {
    // no more used pages; remove from the free list and free the segment
@ -838,44 +841,175 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 /* -----------------------------------------------------------
-   Abandonment
+Abandonment
 When threads terminate, they can leave segments with
 live blocks (reached through other threads). Such segments
 are "abandoned" and will be reclaimed by other threads to
 reuse their pages and/or free them eventually
 We maintain a global list of abandoned segments that are
 reclaimed on demand. Since this is shared among threads
 the implementation needs to avoid the A-B-A problem on
 popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
 We use tagged pointers to avoid accidentially identifying
 reused segments, much like stamped references in Java.
 Secondly, we maintain a reader counter to avoid resetting
 or decommitting segments that have a pending read operation.
 Note: the current implementation is one possible design;
 another way might be to keep track of abandoned segments
 in the regions. This would have the advantage of keeping
 all concurrent code in one place and not needing to deal
 with ABA issues. The drawback is that it is unclear how to 
 scan abandoned segments efficiently in that case as they 
 would be spread among all other segments in the regions.
 ----------------------------------------------------------- */
-// When threads terminate, they can leave segments with
+// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers 
-// live blocks (reached through other threads). Such segments
+// to put in a tag that increments on update to avoid the A-B-A problem.
-// are "abandoned" and will be reclaimed by other threads to
+#define MI_TAGGED_MASK   MI_SEGMENT_MASK
-// reuse their pages and/or free them eventually
+typedef uintptr_t        mi_tagged_segment_t;
 static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
 static volatile _Atomic(uintptr_t)     abandoned_count; // = 0; approximate count of abandoned segments
-// prepend a list of abandoned segments atomically to the global abandoned list; O(n)
+static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) {
-static void mi_segments_prepend_abandoned(mi_segment_t* first) {
+  return (mi_segment_t*)(ts & ~MI_TAGGED_MASK);
-  if (first == NULL) return;
+}
-  // first try if the abandoned list happens to be NULL
+static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) {
-  if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return;
+  mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0);
  uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK;
  return ((uintptr_t)segment | tag);
 }
-  // if not, find the end of the list
+// This is a list of visited abandoned pages that were full at the time.
 // this list migrates to `abandoned` when that becomes NULL. The use of 
 // this list reduces contention and the rate at which segments are visited.
 static mi_decl_cache_align volatile _Atomic(mi_segment_t*)       abandoned_visited; // = NULL
 // The abandoned page list (tagged as it supports pop)
 static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned;         // = NULL
 // We also maintain a count of current readers of the abandoned list
 // in order to prevent resetting/decommitting segment memory if it might
 // still be read.
 static mi_decl_cache_align volatile _Atomic(uintptr_t)           abandoned_readers; // = 0
 // Push on the visited list
 static void mi_abandoned_visited_push(mi_segment_t* segment) {
  mi_assert_internal(segment->thread_id == 0);
  mi_assert_internal(segment->abandoned_next == NULL);
  mi_assert_internal(segment->next == NULL);
  mi_assert_internal(segment->used > 0);
  mi_segment_t* anext;
  do {
    anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited);
    segment->abandoned_next = anext;
  } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext));
 }
 // Move the visited list to the abandoned list.
 static bool mi_abandoned_visited_revisit(void) 
 {
  // quick check if the visited list is empty
  if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false;
  // grab the whole visited list
  mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL);
  if (first == NULL) return false;
  // first try to swap directly if the abandoned list happens to be NULL
  const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
  mi_tagged_segment_t afirst;
  if (mi_tagged_segment_ptr(ts)==NULL) {
    afirst = mi_tagged_segment(first, ts);
    if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true;
  }
  // find the last element of the visited list: O(n)
  mi_segment_t* last = first;
  while (last->abandoned_next != NULL) {
    last = last->abandoned_next;
  }
-  // and atomically prepend
+  // and atomically prepend to the abandoned list
-  mi_segment_t* next;
+  // (no need to increase the readers as we don't access the abandoned segments)
  mi_tagged_segment_t anext;
  do {
-    next = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned);
+    anext = mi_atomic_read_relaxed(&abandoned);
-    last->abandoned_next = next;
+    last->abandoned_next = mi_tagged_segment_ptr(anext);
-  } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, next));
+    afirst = mi_tagged_segment(first, anext);
  } while (!mi_atomic_cas_weak(&abandoned, afirst, anext));
  return true;
 }
 // Push on the abandoned list.
 static void mi_abandoned_push(mi_segment_t* segment) {
  mi_assert_internal(segment->thread_id == 0);
  mi_assert_internal(segment->abandoned_next == NULL);
  mi_assert_internal(segment->next == NULL);
  mi_assert_internal(segment->used > 0);
  mi_tagged_segment_t ts;
  mi_tagged_segment_t next;
  do {
    ts = mi_atomic_read_relaxed(&abandoned);
    segment->abandoned_next = mi_tagged_segment_ptr(ts);
    next = mi_tagged_segment(segment, ts);
  } while (!mi_atomic_cas_weak(&abandoned, next, ts));
 }
 // Wait until there are no more pending reads on segments that used to be in the abandoned list
 void _mi_abandoned_await_readers(void) {
  uintptr_t n;
  do {
    n = mi_atomic_read(&abandoned_readers);
    if (n != 0) mi_atomic_yield();
  } while (n != 0);
 }
 // Pop from the abandoned list
 static mi_segment_t* mi_abandoned_pop(void) {
  mi_segment_t* segment;
  // Check efficiently if it is empty (or if the visited list needs to be moved)
  mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
  segment = mi_tagged_segment_ptr(ts);
  if (mi_likely(segment == NULL)) {
    if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL
      return NULL;  
    }
  }
  // Do a pop. We use a reader count to prevent
  // a segment to be decommitted while a read is still pending, 
  // and a tagged pointer to prevent A-B-A link corruption.
  // (this is called from `memory.c:_mi_mem_free` for example)
  mi_atomic_increment(&abandoned_readers);  // ensure no segment gets decommitted
  mi_tagged_segment_t next = 0;
  do {
    ts = mi_atomic_read_relaxed(&abandoned);
    segment = mi_tagged_segment_ptr(ts);
    if (segment != NULL) {
      next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
    }
  } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts));
  mi_atomic_decrement(&abandoned_readers);  // release reader lock
  if (segment != NULL) {
    segment->abandoned_next = NULL;
  }
  return segment;
 }
 /* -----------------------------------------------------------
   Abandon segment/page
 ----------------------------------------------------------- */
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
  mi_assert_internal(segment->used == segment->abandoned);
  mi_assert_internal(segment->used > 0);
  mi_assert_internal(segment->abandoned_next == NULL);
  mi_assert_internal(segment->abandoned_visits == 0);
  mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  // remove the free pages from our lists
+  // remove the free pages from the free page queues
  mi_slice_t* slice = &segment->slices[0];
  const mi_slice_t* end = mi_segment_slices_end(segment);
  while (slice < end) {
@ -896,8 +1030,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
  mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
  segment->thread_id = 0;
  segment->abandoned_next = NULL;
-  mi_segments_prepend_abandoned(segment); // prepend one-element list
+  segment->abandoned_visits = 1;   // from 0 to 1 to signify it is abandoned
-  mi_atomic_increment(&abandoned_count);  // keep approximate count
+  mi_abandoned_push(segment);
 }
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@ -908,111 +1042,242 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
  mi_assert_expensive(mi_segment_is_valid(segment,tld));
  segment->abandoned++;  
  _mi_stat_increase(&tld->stats->pages_abandoned, 1);
  mi_assert_internal(segment->abandoned <= segment->used);
  if (segment->used == segment->abandoned) {
    // all pages are abandoned, abandon the entire segment
-    mi_segment_abandon(segment,tld);
+    mi_segment_abandon(segment, tld);
  }
 }
-bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) {
+/* -----------------------------------------------------------
-  // To avoid the A-B-A problem, grab the entire list atomically
+  Reclaim abandoned pages
-  mi_segment_t* segment = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned);  // pre-read to avoid expensive atomic operations
+----------------------------------------------------------- */
  if (segment == NULL) return false;
  segment = mi_atomic_exchange_ptr(mi_segment_t, &abandoned, NULL);
  if (segment == NULL) return false;
-  // we got a non-empty list
+static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) {
-  if (!try_all) {
+  mi_slice_t* slice = &segment->slices[0];
-    // take at most 1/8th of the list and append the rest back to the abandoned list again
+  *end = mi_segment_slices_end(segment);
-    // this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem)
+  mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
-    // and probably ok since the length will tend to be not too large.
+  slice = slice + slice->slice_count; // skip the first segment allocated page
-    uintptr_t atmost = mi_atomic_read(&abandoned_count)/8;  // at most 1/8th of all outstanding (estimated)
+  return slice;
-    if (atmost < 8) atmost = 8;    // but at least 8
+}
-    // find the split point
+// Possibly free pages and check if free space is available
-    mi_segment_t* last = segment;
+static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld) 
-    while (last->abandoned_next != NULL && atmost > 0) {
+{
-      last = last->abandoned_next;  
+  mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
-      atmost--;
+  mi_assert_internal(mi_segment_is_abandoned(segment));
-    }
+  bool has_page = false;
-    // split the list and push back the remaining segments
+  
-    mi_segment_t* next = last->abandoned_next;
+  // for all slices
-    last->abandoned_next = NULL;
+  const mi_slice_t* end;
-    mi_segments_prepend_abandoned(next);
+  mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
-  }
+  while (slice < end) {
-
+    mi_assert_internal(slice->slice_count > 0);
-  // reclaim all segments that we kept
+    mi_assert_internal(slice->slice_offset == 0);
-  while(segment != NULL) {
+    if (mi_slice_is_used(slice)) { // used page
-    mi_segment_t* const next = segment->abandoned_next; // save the next segment
+      // ensure used count is up to date and collect potential concurrent frees
-
+      mi_page_t* const page = mi_slice_to_page(slice);
-    // got it.
+      _mi_page_free_collect(page, false);
-    mi_atomic_decrement(&abandoned_count);
+      if (mi_page_all_free(page)) {
-    mi_assert_expensive(mi_segment_is_valid(segment, tld));
+        // if this page is all free now, free it without adding to any queues (yet) 
    segment->abandoned_next = NULL;
    segment->thread_id = _mi_thread_id();
    mi_segments_track_size((long)mi_segment_size(segment),tld);
    mi_assert_internal(segment->next == NULL);
    _mi_stat_decrease(&tld->stats->segments_abandoned,1);
    //mi_assert_internal(segment->decommit_mask == 0);
    mi_slice_t* slice = &segment->slices[0];
    const mi_slice_t* end = mi_segment_slices_end(segment);
    mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
    slice = slice + slice->slice_count; // skip the first segment allocated page
    while (slice < end) {
      mi_assert_internal(slice->slice_count > 0);
      mi_assert_internal(slice->slice_offset == 0);
      if (slice->xblock_size == 0) { // a free page, add it to our lists
        mi_segment_span_add_free(slice,tld);
      }
      slice = slice + slice->slice_count;
    }
    slice = &segment->slices[0];
    mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
    slice = slice + slice->slice_count; // skip the first segment allocated page
    while (slice < end) {
      mi_assert_internal(slice->slice_count > 0);
      mi_assert_internal(slice->slice_offset == 0);
      mi_page_t* page = mi_slice_to_page(slice);
      if (page->xblock_size > 0) { // a used page
        mi_assert_internal(page->next == NULL && page->prev==NULL);
        _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
        segment->abandoned--;
-        // set the heap again and allow delayed free again
+        slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
-        mi_page_set_heap(page, heap);
+        mi_assert_internal(!mi_slice_is_used(slice));
-        _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
+        if (slice->slice_count >= slices_needed) {
-        _mi_page_free_collect(page, false); // ensure used count is up to date
+          has_page = true;
        if (mi_page_all_free(page)) {
          // if everything free by now, free the page
          slice = mi_segment_page_clear(page, tld);   // set slice again due to coalesceing
        }
        else {
          // otherwise reclaim it into the heap
          _mi_page_reclaim(heap,page);
        }
      }
-      mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0);
+      else {
-      slice = slice + slice->slice_count;
+        if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
          // a page has available free blocks of the right size
          has_page = true;
        }
      }      
    }
-
+    else {
-    mi_assert(segment->abandoned == 0);
+      // empty span
-    if (segment->used == 0) {  // due to page_clear
+      if (slice->slice_count >= slices_needed) {
-      mi_segment_free(segment,false,tld);
+        has_page = true;
      }
    }
    slice = slice + slice->slice_count;
  }
  return has_page;
 }
-    // go on
+// Reclaim an abandoned segment; returns NULL if the segment was freed
-    segment = next; 
+// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
 static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
  mi_assert_internal(segment->abandoned_next == NULL);
  mi_assert_expensive(mi_segment_is_valid(segment, tld));
  if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
  segment->thread_id = _mi_thread_id();
  segment->abandoned_visits = 0;
  mi_segments_track_size((long)mi_segment_size(segment), tld);
  mi_assert_internal(segment->next == NULL);
  _mi_stat_decrease(&tld->stats->segments_abandoned, 1);
  // for all slices
  const mi_slice_t* end;
  mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
  while (slice < end) {
    mi_assert_internal(slice->slice_count > 0);
    mi_assert_internal(slice->slice_offset == 0);
    if (mi_slice_is_used(slice)) {
      // in use: reclaim the page in our heap
      mi_page_t* page = mi_slice_to_page(slice);
      mi_assert_internal(!page->is_reset);
      mi_assert_internal(page->is_committed);
      mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
      mi_assert_internal(mi_page_heap(page) == NULL);
      mi_assert_internal(page->next == NULL && page->prev==NULL);
      _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
      segment->abandoned--;
      // set the heap again and allow delayed free again
      mi_page_set_heap(page, heap);
      _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
      _mi_page_free_collect(page, false); // ensure used count is up to date
      if (mi_page_all_free(page)) {
        // if everything free by now, free the page
        slice = mi_segment_page_clear(page, tld);   // set slice again due to coalesceing
      }
      else {
        // otherwise reclaim it into the heap
        _mi_page_reclaim(heap, page);
        if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) {
          if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
        }
      }
    }
    else {
      // the span is free, add it to our page queues
      slice = mi_segment_span_free_coalesce(slice, tld); // set slice again due to coalesceing
    }
    mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0);
    slice = slice + slice->slice_count;
  }
-  return true;
+  mi_assert(segment->abandoned == 0);
  if (segment->used == 0) {  // due to page_clear
    mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed));
    mi_segment_free(segment, false, tld);
    return NULL;
  }
  else {
    return segment;
  }
 }
 void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
  mi_segment_t* segment;
  while ((segment = mi_abandoned_pop()) != NULL) {
    mi_segment_reclaim(segment, heap, 0, NULL, tld);
  }
 }
 static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld)
 {
  *reclaimed = false;
  mi_segment_t* segment;
  int max_tries = 8;     // limit the work to bound allocation times
  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
    segment->abandoned_visits++;
    bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees)
    if (segment->used == 0) {
      // free the segment (by forced reclaim) to make it available to other threads.
      // note1: we prefer to free a segment as that might lead to reclaiming another
      // segment that is still partially used.
      // note2: we could in principle optimize this by skipping reclaim and directly 
      // freeing but that would violate some invariants temporarily)
      mi_segment_reclaim(segment, heap, 0, NULL, tld);
    }
    else if (has_page) {
      // found a large enough free span, or a page of the right block_size with free space 
      // we return the result of reclaim (which is usually `segment`) as it might free
      // the segment due to concurrent frees (in which case `NULL` is returned).
      return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
    }
    else if (segment->abandoned_visits > 3) {  
      // always reclaim on 3rd visit to limit the abandoned queue length.
      mi_segment_reclaim(segment, heap, 0, NULL, tld);
    }
    else {
      // otherwise, push on the visited list so it gets not looked at too quickly again
      mi_abandoned_visited_push(segment);
    }
  }
  return NULL;
 }
 /* -----------------------------------------------------------
   Reclaim or allocate  
 ----------------------------------------------------------- */
 static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) 
 {
  mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
  mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
  // 1. try to get a segment from our cache
  mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld);
  if (segment != NULL) {
    mi_segment_init(segment, 0, tld, os_tld, NULL);
    return segment;
  }
  // 2. try to reclaim an abandoned segment
  bool reclaimed;
  segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);
  if (reclaimed) {
    // reclaimed the right page right into the heap
    mi_assert_internal(segment != NULL);
    return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks
  }
  else if (segment != NULL) {
    // reclaimed a segment with a large enough empty span in it
    return segment;
  }
  // 3. otherwise allocate a fresh segment
  return mi_segment_alloc(0, tld, os_tld, NULL);  
 }
 /* -----------------------------------------------------------
   Page allocation
 ----------------------------------------------------------- */
 static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_kind, size_t required, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
  mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
  // find a free page
  size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
  size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
  mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
  mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
  if (page==NULL) {
    // no free page, allocate a new segment and try again
    if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) {
      // OOM or reclaimed a good page in the heap
      return NULL;  
    }
    else {
      // otherwise try again
      return mi_segments_page_alloc(heap, page_kind, required, block_size, tld, os_tld);
    }
  }
  mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
  mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
  mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
  return page;
 }
 /* -----------------------------------------------------------
   Huge page allocation
 ----------------------------------------------------------- */
@ -1031,16 +1296,16 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
 /* -----------------------------------------------------------
   Page allocation and free
 ----------------------------------------------------------- */
-mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
+mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
  mi_page_t* page;
  if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
-    page = mi_segments_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld);
+    page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld);
  }
  else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {
-    page = mi_segments_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld);
+    page = mi_segments_page_alloc(heap,MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,block_size,tld, os_tld);
  }
  else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {
-    page = mi_segments_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld);
+    page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
  }
  else {
    page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
--- a/src/static.c
+++ b/src/static.c
@ -18,6 +18,10 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "os.c"
 //#include "memory.c"
 #include "arena.c"
 <<<<<<< HEAD
 =======
 #include "region.c"
 >>>>>>> dev-exp
 #include "segment.c"
 #include "page.c"
 #include "heap.c"
--- a/test/test-stress.c
+++ b/test/test-stress.c
@ -32,16 +32,18 @@ static int ITER    = 50;      // N full iterations destructing and re-creating a
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int SCALE   = 100;  // scaling factor
 #define STRESS   // undefine for leak test
 static bool   allow_large_objects = true;    // allow very large objects?
 static size_t use_one_size = 0;              // use single object size of `N * sizeof(uintptr_t)`?
 #ifdef USE_STD_MALLOC
-#define custom_malloc(s)      malloc(s)
+#define custom_calloc(n,s)    calloc(n,s)
 #define custom_realloc(p,s)   realloc(p,s)
 #define custom_free(p)        free(p)
 #else
-#define custom_malloc(s)      mi_malloc(s)
+#define custom_calloc(n,s)    mi_calloc(n,s)
 #define custom_realloc(p,s)   mi_realloc(p,s)
 #define custom_free(p)        mi_free(p)
 #endif
@ -94,9 +96,12 @@ static void* alloc_items(size_t items, random_t r) {
  }
  if (items == 40) items++;              // pthreads uses that size for stack increases
  if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
-  uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t));
+  if (items==0) items = 1;
  uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
  if (p != NULL) {
-    for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
+    for (uintptr_t i = 0; i < items; i++) {
      p[i] = (items - i) ^ cookie;
    }
  }
  return p;
 }
@ -118,7 +123,7 @@ static void free_items(void* p) {
 static void stress(intptr_t tid) {
  //bench_start_thread();
-  uintptr_t r = tid * 43;
+  uintptr_t r = (tid * 43); // rand();
  const size_t max_item_shift = 5; // 128
  const size_t max_item_retained_shift = max_item_shift + 2;
  size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
@ -126,7 +131,7 @@ static void stress(intptr_t tid) {
  void** data = NULL;
  size_t data_size = 0;
  size_t data_top = 0;
-  void** retained = (void**)custom_malloc(retain * sizeof(void*));
+  void** retained = (void**)custom_calloc(retain,sizeof(void*));
  size_t retain_top = 0;
  while (allocs > 0 || retain > 0) {
@ -171,7 +176,46 @@ static void stress(intptr_t tid) {
  //bench_end_thread();
 }
-static void run_os_threads(size_t nthreads);
+static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
 static void test_stress(void) {
  uintptr_t r = rand();
  for (int n = 0; n < ITER; n++) {
    run_os_threads(THREADS, &stress);
    for (int i = 0; i < TRANSFERS; i++) {
      if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
        void* p = atomic_exchange_ptr(&transfer[i], NULL);
        free_items(p);
      }
    }
    mi_collect(false);
 #ifndef NDEBUG
    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
  }
 }
 #ifndef STRESS
 static void leak(intptr_t tid) {
  uintptr_t r = rand();
  void* p = alloc_items(1 /*pick(&r)%128*/, &r);
  if (chance(50, &r)) {
    intptr_t i = (pick(&r) % TRANSFERS);
    void* q = atomic_exchange_ptr(&transfer[i], p);
    free_items(q);
  }
 }
 static void test_leak(void) {  
  for (int n = 0; n < ITER; n++) {
    run_os_threads(THREADS, &leak);
    mi_collect(false);
 #ifndef NDEBUG
    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
  }
 }
 #endif
 int main(int argc, char** argv) {
  // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
@ -197,21 +241,13 @@ int main(int argc, char** argv) {
  //bench_start_program();
  // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
  srand(0x7feb352d);
  mi_stats_reset();
-  uintptr_t r = 43 * 43;
+#ifdef STRESS
-  for (int n = 0; n < ITER; n++) {
+    test_stress();
-    run_os_threads(THREADS);
+#else
-    for (int i = 0; i < TRANSFERS; i++) {
+    test_leak();
-      if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
+#endif  
        void* p = atomic_exchange_ptr(&transfer[i], NULL);
        free_items(p);
      }
    }
    mi_collect(false);
 #ifndef NDEBUG
    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
  }
  mi_collect(true);
  mi_stats_print(NULL);
@ -220,18 +256,21 @@ int main(int argc, char** argv) {
 }
 static void (*thread_entry_fun)(intptr_t) = &stress;
 #ifdef _WIN32
 #include <windows.h>
-static DWORD WINAPI thread_entry(LPVOID param) {
+static DWORD WINAPI thread_entry(LPVOID param) {  
-  stress((intptr_t)param);
+  thread_entry_fun((intptr_t)param);
  return 0;
 }
-static void run_os_threads(size_t nthreads) {
+static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
-  DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD));
+  thread_entry_fun = fun;
-  HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE));
+  DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
  HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
  for (uintptr_t i = 0; i < nthreads; i++) {
    thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]);
  }
@ -246,7 +285,7 @@ static void run_os_threads(size_t nthreads) {
 }
 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
-#if (INTPTR_MAX == UINT32_MAX)
+#if (INTPTR_MAX == INT32_MAX)
  return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
 #else
  return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
@ -257,12 +296,13 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) {
 #include <pthread.h>
 static void* thread_entry(void* param) {
-  stress((uintptr_t)param);
+  thread_entry_fun((uintptr_t)param);
  return NULL;
 }
-static void run_os_threads(size_t nthreads) {
+static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
-  pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t));
+  thread_entry_fun = fun;
  pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
  memset(threads, 0, sizeof(pthread_t) * nthreads);
  //pthread_setconcurrency(nthreads);
  for (uintptr_t i = 0; i < nthreads; i++) {
@ -277,12 +317,12 @@ static void run_os_threads(size_t nthreads) {
 #ifdef __cplusplus
 #include <atomic>
 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
-  return std::atomic_exchange_explicit((volatile std::atomic<void*>*)p, newval, std::memory_order_acquire);
+  return std::atomic_exchange((volatile std::atomic<void*>*)p, newval);
 }
 #else
 #include <stdatomic.h>
 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
-  return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire);
+  return atomic_exchange((volatile _Atomic(void*)*)p, newval);
 }
 #endif