Merge branch 'dev-slice'

2025-07-01 01:04:37 +03:00 · 2023-04-24 09:32:44 -07:00 · 2023-04-24 09:32:44 -07:00 · 2cbf68b5e7
commit 2cbf68b5e7
parent 5ac9e36ed6 43ce4bd7fd
47 changed files with 1977 additions and 1768 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -50,7 +50,7 @@ set(mi_sources
    src/page.c
    src/random.c
    src/segment.c
-    src/segment-cache.c
+    src/segment-map.c
    src/stats.c
    src/prim/prim.c)

--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@ -1,6 +1,6 @@
 set(mi_version_major 2)
 set(mi_version_minor 1)
-set(mi_version_patch 1)
+set(mi_version_patch 2)
 set(mi_version ${mi_version_major}.${mi_version_minor})

 set(PACKAGE_VERSION ${mi_version})
--- a/doc/mimalloc-doc.h
+++ b/doc/mimalloc-doc.h
@ -821,12 +821,12 @@ typedef enum mi_option_e {
  mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
  mi_option_segment_reset,   ///< Experimental
  mi_option_reset_delay,     ///< Delay in milli-seconds before resetting a page (100ms by default)
-  mi_option_reset_decommits, ///< Experimental
+  mi_option_purge_decommits, ///< Experimental

  // v2.x specific options
-  mi_option_allow_decommit,  ///< Enable decommitting memory (=on)
-  mi_option_decommit_delay,  ///< Decommit page memory after N milli-seconds delay (25ms).
-  mi_option_segment_decommit_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms).
+  mi_option_allow_purge,  ///< Enable decommitting memory (=on)
+  mi_option_purge_delay,  ///< Decommit page memory after N milli-seconds delay (25ms).
+  mi_option_segment_purge_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms).

  _mi_option_last
 } mi_option_t;
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@ -249,7 +249,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
+    <ClCompile Include="..\..\src\segment-map.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\stats.c" />
  </ItemGroup>
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@ -91,5 +91,8 @@
    <ClCompile Include="..\..\src\prim\prim.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\segment-map.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
 </Project>
--- a/ide/vs2017/mimalloc.vcxproj
+++ b/ide/vs2017/mimalloc.vcxproj
@ -237,7 +237,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
+    <ClCompile Include="..\..\src\segment-map.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\os.c" />
    <ClCompile Include="..\..\src\stats.c" />
--- a/ide/vs2017/mimalloc.vcxproj.filters
+++ b/ide/vs2017/mimalloc.vcxproj.filters
@ -50,9 +50,6 @@
    <ClCompile Include="..\..\src\arena.c">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\arena.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
    <ClCompile Include="..\..\src\random.c">
      <Filter>Source Files</Filter>
    </ClCompile>
@ -65,6 +62,9 @@
    <ClCompile Include="..\..\src\prim\prim.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\segment-map.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@ -249,7 +249,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
+    <ClCompile Include="..\..\src\segment-map.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\stats.c" />
  </ItemGroup>
--- a/ide/vs2019/mimalloc-override.vcxproj.filters
+++ b/ide/vs2019/mimalloc-override.vcxproj.filters
@ -52,6 +52,9 @@
    <ClCompile Include="..\..\src\prim\prim.c" />
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\segment-map.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@ -235,7 +235,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
+    <ClCompile Include="..\..\src\segment-map.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\os.c" />
    <ClCompile Include="..\..\src\stats.c" />
--- a/ide/vs2019/mimalloc.vcxproj.filters
+++ b/ide/vs2019/mimalloc.vcxproj.filters
@ -55,6 +55,9 @@
    <ClCompile Include="..\..\src\prim\windows\prim.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\segment-map.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@ -257,7 +257,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
+    <ClCompile Include="..\..\src\segment-map.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\stats.c" />
  </ItemGroup>
--- a/ide/vs2022/mimalloc-override.vcxproj.filters
+++ b/ide/vs2022/mimalloc-override.vcxproj.filters
@ -0,0 +1,107 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClCompile Include="..\..\src\alloc.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-aligned.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-posix.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\bitmap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\heap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\init.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\options.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\os.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page-queue.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\prim\windows\prim.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\prim\prim.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\segment.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\segment-map.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\stats.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\include\mimalloc\atomic.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\bitmap.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\internal.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-etw.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-etw-gen.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-new-delete.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-override.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\track.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\types.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\prim.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <Filter Include="Headers">
+      <UniqueIdentifier>{9ef1cf48-7bb2-4af1-8cc1-603486e08a7a}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Sources">
+      <UniqueIdentifier>{cfcf1674-81e3-487a-a8dd-5f956ae4007d}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="..\..\include\mimalloc-etw-gen.man">
+      <Filter>Headers</Filter>
+    </None>
+  </ItemGroup>
+</Project>
--- a/ide/vs2022/mimalloc-test-api.vcxproj
+++ b/ide/vs2022/mimalloc-test-api.vcxproj
@ -141,7 +141,14 @@
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
+    <ClCompile Include="..\..\test\test-api-fill.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
    <ClCompile Include="..\..\test\test-api.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@ -235,7 +235,7 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
+    <ClCompile Include="..\..\src\segment-map.c" />
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\os.c" />
    <ClCompile Include="..\..\src\stats.c" />
@ -243,7 +243,6 @@
  <ItemGroup>
    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h" />
-    <ClInclude Include="..\..\include\mimalloc-etw.h" />
    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
    <ClInclude Include="..\..\include\mimalloc\atomic.h" />
    <ClInclude Include="..\..\include\mimalloc\internal.h" />
--- a/ide/vs2022/mimalloc.vcxproj.filters
+++ b/ide/vs2022/mimalloc.vcxproj.filters
@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClCompile Include="..\..\src\alloc.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-aligned.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-posix.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\bitmap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\heap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\init.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\options.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\os.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page-queue.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\prim\windows\prim.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\prim\prim.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\segment.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\segment-map.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\stats.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\src\bitmap.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\atomic.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\internal.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-new-delete.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\track.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\types.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\prim.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <Filter Include="Headers">
+      <UniqueIdentifier>{dd2da697-c33c-4348-bf80-a802ebaa06fb}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Sources">
+      <UniqueIdentifier>{8027057b-4b93-4321-b93c-d51dd0c8077b}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+</Project>
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H

-#define MI_MALLOC_VERSION 211   // major + 2 digits minor
+#define MI_MALLOC_VERSION 212   // major + 2 digits minor

 // ------------------------------------------------------
 // Compiler specific attributes
@ -284,7 +284,7 @@ mi_decl_export int   mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node,
 mi_decl_export int   mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 mi_decl_export bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;

-#if MI_MALLOC_VERSION >= 200
+#if MI_MALLOC_VERSION >= 182
 // Create a heap that only allocates in the specified arena
 mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
 #endif
@ -318,35 +318,40 @@ mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size

 typedef enum mi_option_e {
  // stable options
-  mi_option_show_errors,
-  mi_option_show_stats,
-  mi_option_verbose,
-  // some of the following options are experimental
-  // (deprecated options are kept for binary backward compatibility with v1.x versions)
-  mi_option_eager_commit,
-  mi_option_deprecated_eager_region_commit,
-  mi_option_deprecated_reset_decommits,
-  mi_option_large_os_pages,           // use large (2MiB) OS pages, implies eager commit
-  mi_option_reserve_huge_os_pages,    // reserve N huge OS pages (1GiB) at startup
+  mi_option_show_errors,              // print error messages
+  mi_option_show_stats,               // print statistics on termination
+  mi_option_verbose,                  // print verbose messages
+  // the following options are experimental (see src/options.h)
+  mi_option_eager_commit,             // eager commit segments? (after `eager_commit_delay` segments) (=1)
+  mi_option_arena_eager_commit,       // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
+  mi_option_purge_decommits,          // should a memory purge decommit (or only reset) (=1)
+  mi_option_allow_large_os_pages,     // allow large (2MiB) OS pages, implies eager commit
+  mi_option_reserve_huge_os_pages,    // reserve N huge OS pages (1GiB/page) at startup
  mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
-  mi_option_reserve_os_memory,        // reserve specified amount of OS memory at startup
+  mi_option_reserve_os_memory,        // reserve specified amount of OS memory in an arena at startup
  mi_option_deprecated_segment_cache,
-  mi_option_page_reset,
-  mi_option_abandoned_page_decommit,
-  mi_option_deprecated_segment_reset,
-  mi_option_eager_commit_delay,
-  mi_option_decommit_delay,
-  mi_option_use_numa_nodes,           // 0 = use available numa nodes, otherwise use at most N nodes.
-  mi_option_limit_os_alloc,           // 1 = do not use OS memory for allocation (but only reserved arenas)
-  mi_option_os_tag,
-  mi_option_max_errors,
-  mi_option_max_warnings,
-  mi_option_max_segment_reclaim,
-  mi_option_allow_decommit,
-  mi_option_segment_decommit_delay,  
-  mi_option_decommit_extend_delay,
-  mi_option_destroy_on_exit,          
-  _mi_option_last
+  mi_option_deprecated_page_reset,
+  mi_option_abandoned_page_purge,     // immediately purge delayed purges on thread termination
+  mi_option_deprecated_segment_reset, 
+  mi_option_eager_commit_delay,       
+  mi_option_purge_delay,              // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all.
+  mi_option_use_numa_nodes,           // 0 = use all available numa nodes, otherwise use at most N nodes.
+  mi_option_limit_os_alloc,           // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
+  mi_option_os_tag,                   // tag used for OS logging (macOS only for now)
+  mi_option_max_errors,               // issue at most N error messages
+  mi_option_max_warnings,             // issue at most N warning messages
+  mi_option_max_segment_reclaim,      
+  mi_option_destroy_on_exit,          // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe.
+  mi_option_arena_reserve,            // initial memory size in KiB for arena reservation (1GiB on 64-bit)
+  mi_option_arena_purge_mult,         
+  mi_option_purge_extend_delay,
+  _mi_option_last,
+  // legacy option names
+  mi_option_large_os_pages = mi_option_allow_large_os_pages,
+  mi_option_eager_region_commit = mi_option_arena_eager_commit,
+  mi_option_reset_decommits = mi_option_purge_decommits,
+  mi_option_reset_delay = mi_option_purge_delay,
+  mi_option_abandoned_page_reset = mi_option_abandoned_page_purge
 } mi_option_t;


@ -356,8 +361,9 @@ mi_decl_export void mi_option_disable(mi_option_t option);
 mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable);
 mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable);

-mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option);
-mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max);
+mi_decl_nodiscard mi_decl_export long   mi_option_get(mi_option_t option);
+mi_decl_nodiscard mi_decl_export long   mi_option_get_clamp(mi_option_t option, long min, long max);
+mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option);
 mi_decl_export void mi_option_set(mi_option_t option, long value);
 mi_decl_export void mi_option_set_default(mi_option_t option, long value);

--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@ -39,7 +39,11 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <stdatomic.h>
 #define  mi_atomic(name)        atomic_##name
 #define  mi_memory_order(name)  memory_order_##name
-#define  MI_ATOMIC_VAR_INIT(x)  ATOMIC_VAR_INIT(x)
+#if !defined(ATOMIC_VAR_INIT) || (__STDC_VERSION__ >= 201710L) // c17, see issue #735
+ #define MI_ATOMIC_VAR_INIT(x) x
+#else
+ #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
+#endif
 #endif

 // Various defines for all used memory orders in mimalloc
@ -113,11 +117,13 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
 }

 // Used by timers
-#define mi_atomic_loadi64_acquire(p)    mi_atomic(load_explicit)(p,mi_memory_order(acquire))
-#define mi_atomic_loadi64_relaxed(p)    mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
-#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
-#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
+#define mi_atomic_loadi64_acquire(p)            mi_atomic(load_explicit)(p,mi_memory_order(acquire))
+#define mi_atomic_loadi64_relaxed(p)            mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
+#define mi_atomic_storei64_release(p,x)         mi_atomic(store_explicit)(p,x,mi_memory_order(release))
+#define mi_atomic_storei64_relaxed(p,x)         mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))

+#define mi_atomic_casi64_strong_acq_rel(p,e,d)  mi_atomic_cas_strong_acq_rel(p,e,d)
+#define mi_atomic_addi64_acq_rel(p,i)           mi_atomic_add_acq_rel(p,i)


 #elif defined(_MSC_VER)
@ -245,6 +251,21 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t
  } while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
 }

+static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) {
+  mi_atomic_addi64_relaxed(p, i);
+}
+
+static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) {
+  int64_t read = _InterlockedCompareExchange64(p, des, *exp);
+  if (read == *exp) {
+    return true;
+  }
+  else {
+    *exp = read;
+    return false;
+  }
+}
+
 // The pointer macros cast to `uintptr_t`.
 #define mi_atomic_load_ptr_acquire(tp,p)                (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p))
 #define mi_atomic_load_ptr_relaxed(tp,p)                (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p))
@ -281,9 +302,20 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t;
 static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
  if (mi_atomic_load_relaxed(once) != 0) return false;     // quick test 
  uintptr_t expected = 0;
-  return mi_atomic_cas_strong_acq_rel(once, &expected, 1); // try to set to 1
+  return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1
 }

+typedef _Atomic(uintptr_t) mi_atomic_guard_t;
+
+// Allows only one thread to execute at a time
+#define mi_atomic_guard(guard) \
+  uintptr_t _mi_guard_expected = 0; \
+  for(bool _mi_guard_once = true; \
+      _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \
+      (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) )
+
+
+
 // Yield
 #if defined(__cplusplus)
 #include <thread>
@ -303,7 +335,7 @@ static inline void mi_atomic_yield(void) {
 }
 #elif (defined(__GNUC__) || defined(__clang__)) && \
      (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \
-       defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
+       defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__)
 #if defined(__x86_64__) || defined(__i386__)
 static inline void mi_atomic_yield(void) {
  __asm__ volatile ("pause" ::: "memory");
@ -316,10 +348,16 @@ static inline void mi_atomic_yield(void) {
 static inline void mi_atomic_yield(void) {
  __asm__ volatile("yield" ::: "memory");
 }
-#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
+#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)
+#ifdef __APPLE__
+static inline void mi_atomic_yield(void) {
+  __asm__ volatile ("or r27,r27,r27" ::: "memory");
+}
+#else
 static inline void mi_atomic_yield(void) {
  __asm__ __volatile__ ("or 27,27,27" ::: "memory");
 }
+#endif
 #elif defined(__armel__) || defined(__ARMEL__)
 static inline void mi_atomic_yield(void) {
  __asm__ volatile ("nop" ::: "memory");
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@ -80,49 +80,52 @@ extern mi_decl_cache_align mi_stats_t       _mi_stats_main;
 extern mi_decl_cache_align const mi_page_t  _mi_page_empty;
 bool       _mi_is_main_thread(void);
 size_t     _mi_current_thread_count(void);
-bool       _mi_preloading(void);        // true while the C runtime is not ready
+bool       _mi_preloading(void);           // true while the C runtime is not initialized yet
 mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
-mi_heap_t* _mi_heap_main_get(void);     // statically allocated main backing heap
+mi_heap_t*    _mi_heap_main_get(void);     // statically allocated main backing heap
 void       _mi_thread_done(mi_heap_t* heap);
+void       _mi_thread_data_collect(void);

 // os.c
-void       _mi_os_init(void);                                      // called from process init
-void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
-void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
+void       _mi_os_init(void);                                            // called from process init
+void*      _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);  
+void       _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats);
+void       _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats);
+
 size_t     _mi_os_page_size(void);
 size_t     _mi_os_good_alloc_size(size_t size);
 bool       _mi_os_has_overcommit(void);
+bool       _mi_os_has_virtual_reserve(void);

+bool       _mi_os_purge(void* p, size_t size, mi_stats_t* stats);
 bool       _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
 bool       _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 bool       _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
 bool       _mi_os_protect(void* addr, size_t size);
 bool       _mi_os_unprotect(void* addr, size_t size);
+bool       _mi_os_purge(void* p, size_t size, mi_stats_t* stats);
+bool       _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats);
+
+void*      _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats);
+void*      _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats);

-void*      _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
-void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
-void       _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
 void*      _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
 bool       _mi_os_use_large_page(size_t size, size_t alignment);
 size_t     _mi_os_large_page_size(void);

-void       _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
-void*      _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
-void       _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
+void*      _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid);

 // arena.c
 mi_arena_id_t _mi_arena_id_none(void);
-void       _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
-void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-bool       _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id);
-bool       _mi_arena_is_os_allocated(size_t arena_memid);
+void       _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats);
+void*      _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld);
+void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld);
+bool       _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id);
+bool       _mi_arena_contains(const void* p);
+void       _mi_arena_collect(bool force_purge, mi_stats_t* stats);
+void       _mi_arena_unsafe_destroy_all(mi_stats_t* stats);

-// "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
-void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
-void       _mi_segment_cache_free_all(mi_os_tld_t* tld);
+// "segment-map.c"
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);

@ -170,8 +173,8 @@ uint8_t    _mi_bin(size_t size);                // for stats
 void       _mi_heap_destroy_pages(mi_heap_t* heap);
 void       _mi_heap_collect_abandon(mi_heap_t* heap);
 void       _mi_heap_set_default_direct(mi_heap_t* heap);
-bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
-void       _mi_heap_destroy_all(void);
+bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
+void       _mi_heap_unsafe_destroy_all(void);

 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
@ -266,6 +269,10 @@ bool        _mi_page_is_valid(mi_page_t* page);
 #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)


+#include <string.h>
+// initialize a local variable to zero; use memset as compilers optimize constant sized memset's
+#define _mi_memzero_var(x)  memset(&x,0,sizeof(x))
+
 // Is `x` a power of two? (0 is considered a power of two)
 static inline bool _mi_is_power_of_two(uintptr_t x) {
  return ((x & (x - 1)) == 0);
@ -308,7 +315,7 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
 }

 // Is memory zero initialized?
-static inline bool mi_mem_is_zero(void* p, size_t size) {
+static inline bool mi_mem_is_zero(const void* p, size_t size) {
  for (size_t i = 0; i < size; i++) {
    if (((uint8_t*)p)[i] != 0) return false;
  }
@ -727,6 +734,29 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
      


+/* -----------------------------------------------------------
+  memory id's
+----------------------------------------------------------- */
+
+static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) {
+  mi_memid_t memid;
+  _mi_memzero_var(memid);
+  memid.memkind = memkind;
+  return memid;
+}
+
+static inline mi_memid_t _mi_memid_none(void) {
+  return _mi_memid_create(MI_MEM_NONE);
+}
+
+static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) {
+  mi_memid_t memid = _mi_memid_create(MI_MEM_OS);
+  memid.initially_committed = committed;
+  memid.initially_zero = is_zero;
+  memid.is_pinned = is_large;
+  return memid;
+}
+

 // -------------------------------------------------------------------
 // Fast "random" shuffle
@ -887,7 +917,6 @@ static inline size_t mi_bsr(uintptr_t x) {

 #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
 #include <intrin.h>
-#include <string.h>
 extern bool _mi_cpu_has_fsrm;
 static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
  if (_mi_cpu_has_fsrm) {
@ -906,7 +935,6 @@ static inline void _mi_memzero(void* dst, size_t n) {
  }
 }
 #else
-#include <string.h>
 static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
  memcpy(dst, src, n);
 }
@ -915,7 +943,6 @@ static inline void _mi_memzero(void* dst, size_t n) {
 }
 #endif

-
 // -------------------------------------------------------------------------------
 // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
 // This is used for example in `mi_realloc`.
@ -923,7 +950,6 @@ static inline void _mi_memzero(void* dst, size_t n) {

 #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
 // On GCC/CLang we provide a hint that the pointers are word aligned.
-#include <string.h>
 static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
  mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
  void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Each OS/host needs to implement these primitives, see `src/prim`
 // for implementations on Window, macOS, WASI, and Linux/Unix.
 //
-// note: on all primitive functions, we always get:
+// note: on all primitive functions, we always have result parameters != NUL, and:
 //  addr != NULL and page aligned
 //  size > 0     and page aligned
 //  return value is an error code an int where 0 is success.
@ -22,11 +22,12 @@ terms of the MIT license. A copy of the license can be found in the file

 // OS memory configuration
 typedef struct mi_os_mem_config_s {
-  size_t  page_size;          // 4KiB
-  size_t  large_page_size;    // 2MiB
-  size_t  alloc_granularity;  // smallest allocation size (on Windows 64KiB)
-  bool    has_overcommit;     // can we reserve more memory than can be actually committed?
-  bool    must_free_whole;    // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc)
+  size_t  page_size;            // 4KiB
+  size_t  large_page_size;      // 2MiB
+  size_t  alloc_granularity;    // smallest allocation size (on Windows 64KiB)
+  bool    has_overcommit;       // can we reserve more memory than can be actually committed?
+  bool    must_free_whole;      // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc)
+  bool    has_virtual_reserve;  // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory)
 } mi_os_mem_config_t;

 // Initialize
@ -37,12 +38,23 @@ int _mi_prim_free(void* addr, size_t size );
  
 // Allocate OS memory. Return NULL on error.
 // The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
+// If `commit` is false, the virtual memory range only needs to be reserved (with no access) 
+// which will later be committed explicitly using `_mi_prim_commit`.
+// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
 // pre: !commit => !allow_large
 //      try_alignment >= _mi_os_page_size() and a power of 2
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr);
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr);

 // Commit memory. Returns error code or 0 on success.
-int _mi_prim_commit(void* addr, size_t size, bool commit);
+// For example, on Linux this would make the memory PROT_READ|PROT_WRITE.
+// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows)
+int _mi_prim_commit(void* addr, size_t size, bool* is_zero);
+
+// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true
+// if the memory would need to be re-committed. For example, on Windows this is always true,
+// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit.
+// pre: needs_recommit != NULL
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit);

 // Reset memory. The range keeps being accessible but the content might be reset.
 // Returns error code or 0 on success.
@ -52,10 +64,10 @@ int _mi_prim_reset(void* addr, size_t size);
 int _mi_prim_protect(void* addr, size_t size, bool protect);

 // Allocate huge (1GiB) pages possibly associated with a NUMA node.
+// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
 // pre: size > 0  and a multiple of 1GiB.
-//      addr is either NULL or an address hint.
 //      numa_node is either negative (don't care), or a numa node number.
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr);
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr);

 // Return the current NUMA node
 size_t _mi_prim_numa_node(void);
--- a/include/mimalloc/track.h
+++ b/include/mimalloc/track.h
@ -79,7 +79,7 @@ defined, undefined, or not accessible at all:
 // windows event tracing

 #define MI_TRACK_ENABLED      1
-#define MI_TRACK_HEAP_DESTROY 0
+#define MI_TRACK_HEAP_DESTROY 1
 #define MI_TRACK_TOOL         "ETW"

 #define WIN32_LEAN_AND_MEAN
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@ -172,7 +172,7 @@ typedef int32_t  mi_ssize_t;
 // Derived constants
 #define MI_SEGMENT_SIZE                   (MI_ZU(1)<<MI_SEGMENT_SHIFT)
 #define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
-#define MI_SEGMENT_MASK                   (MI_SEGMENT_ALIGN - 1)
+#define MI_SEGMENT_MASK                   ((uintptr_t)(MI_SEGMENT_ALIGN - 1))
 #define MI_SEGMENT_SLICE_SIZE             (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
 #define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024

@ -291,16 +291,15 @@ typedef uintptr_t mi_thread_free_t;
 typedef struct mi_page_s {
  // "owned" by the segment
  uint32_t              slice_count;       // slices in this page (0 if not a page)
-  uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
-  uint8_t               is_reset : 1;      // `true` if the page memory was reset
+  uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)  
  uint8_t               is_committed : 1;  // `true` if the page virtual memory is committed
-  uint8_t               is_zero_init : 1;  // `true` if the page was zero initialized
+  uint8_t               is_zero_init : 1;  // `true` if the page was initially zero initialized

  // layout like this to optimize access in `mi_malloc` and `mi_free`
  uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
  uint16_t              reserved;          // number of blocks reserved in memory
  mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
-  uint8_t               is_zero : 1;       // `true` if the blocks in the free list are zero initialized
+  uint8_t               free_is_zero : 1;  // `true` if the blocks in the free list are zero initialized
  uint8_t               retire_expire : 7; // expiration count for retired blocks

  mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
@ -326,6 +325,10 @@ typedef struct mi_page_s {



+// ------------------------------------------------------
+// Mimalloc segments contain mimalloc pages
+// ------------------------------------------------------
+
 typedef enum mi_page_kind_e {
  MI_PAGE_SMALL,    // small blocks go into 64KiB pages inside a segment
  MI_PAGE_MEDIUM,   // medium blocks go into medium pages inside a segment
@ -350,7 +353,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------

-#define MI_MINIMAL_COMMIT_SIZE      (16*MI_SEGMENT_SLICE_SIZE)           // 1MiB
+#define MI_MINIMAL_COMMIT_SIZE      (1*MI_SEGMENT_SLICE_SIZE)            
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
@ -368,20 +371,57 @@ typedef mi_page_t  mi_slice_t;
 typedef int64_t    mi_msecs_t;


+// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
+typedef enum mi_memkind_e {
+  MI_MEM_NONE,      // not allocated
+  MI_MEM_EXTERNAL,  // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
+  MI_MEM_STATIC,    // allocated in a static area and should not be freed (for arena meta data for example)
+  MI_MEM_OS,        // allocated from the OS
+  MI_MEM_OS_HUGE,   // allocated as huge os pages
+  MI_MEM_OS_REMAP,  // allocated in a remapable area (i.e. using `mremap`)
+  MI_MEM_ARENA      // allocated from an arena (the usual case)
+} mi_memkind_t;
+
+static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
+  return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
+}
+
+typedef struct mi_memid_os_info {
+  void*         base;               // actual base address of the block (used for offset aligned allocations)
+  size_t        alignment;          // alignment at allocation
+} mi_memid_os_info_t;
+
+typedef struct mi_memid_arena_info {
+  size_t        block_index;        // index in the arena
+  mi_arena_id_t id;                 // arena id (>= 1)
+  bool          is_exclusive;       // the arena can only be used for specific arena allocations
+} mi_memid_arena_info_t;
+
+typedef struct mi_memid_s {
+  union {
+    mi_memid_os_info_t    os;       // only used for MI_MEM_OS
+    mi_memid_arena_info_t arena;    // only used for MI_MEM_ARENA
+  } mem;
+  bool          is_pinned;          // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages)
+  bool          initially_committed;// `true` if the memory was originally allocated as committed
+  bool          initially_zero;     // `true` if the memory was originally zero initialized
+  mi_memkind_t  memkind;
+} mi_memid_t;
+
+
 // Segments are large allocated memory blocks (8mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
 typedef struct mi_segment_s {
-  size_t            memid;              // memory id for arena allocation
-  bool              mem_is_pinned;      // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
-  bool              mem_is_large;       // in large/huge os pages?
-  bool              mem_is_committed;   // `true` if the whole segment is eagerly committed
-  size_t            mem_alignment;      // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX)
-  size_t            mem_align_offset;   // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX)
+  // constant fields
+  mi_memid_t        memid;              // memory id for arena allocation
+  bool              allow_decommit;
+  bool              allow_purge;
+  size_t            segment_size;

-  bool              allow_decommit;     
-  mi_msecs_t        decommit_expire;
-  mi_commit_mask_t  decommit_mask;
+  // segment fields
+  mi_msecs_t        purge_expire;
+  mi_commit_mask_t  purge_mask;
  mi_commit_mask_t  commit_mask;

  _Atomic(struct mi_segment_s*) abandoned_next;
@ -540,6 +580,7 @@ typedef struct mi_stats_s {
  mi_stat_count_t reserved;
  mi_stat_count_t committed;
  mi_stat_count_t reset;
+  mi_stat_count_t purged;
  mi_stat_count_t page_committed;
  mi_stat_count_t segments_abandoned;
  mi_stat_count_t pages_abandoned;
@ -552,6 +593,8 @@ typedef struct mi_stats_s {
  mi_stat_counter_t pages_extended;
  mi_stat_counter_t mmap_calls;
  mi_stat_counter_t commit_calls;
+  mi_stat_counter_t reset_calls;
+  mi_stat_counter_t purge_calls;
  mi_stat_counter_t page_no_retire;
  mi_stat_counter_t searches;
  mi_stat_counter_t normal_count;
--- a/readme.md
+++ b/readme.md
@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the runtime systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.

-Latest release tag: `v2.1.1` (2023-04-03).
-Latest stable  tag: `v1.8.1` (2023-04-03).
+Latest release tag: `v2.1.2` (2023-04-24).
+Latest stable  tag: `v1.8.2` (2023-04-24).

 mimalloc is a drop-in replacement for `malloc` and can be used in other programs
 without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@ -43,7 +43,7 @@ It also includes a robust way to override the default allocator in [Windows](#ov
  and the chance of contending on a single location will be low -- this is quite
  similar to randomized algorithms like skip lists where adding
  a random oracle removes the need for a more complex algorithm.
- __eager page reset__: when a "page" becomes empty (with increased chance
+- __eager page purging__: when a "page" becomes empty (with increased chance
  due to free list sharding) the memory is marked to the OS as unused (reset or decommitted)
  reducing (real) memory pressure and fragmentation, especially in long running
  programs.
@ -78,6 +78,10 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
  and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
  (see [below](#performance)); please report if you observe any significant performance regression.

+* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity 
+  by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory
+  usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking.
+  
 * 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms.

 * 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision
@ -104,20 +108,6 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
  improved wasm support, faster aligned allocation,
  various small fixes.

-* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
-  M1), improved performance for v2 for large objects, Python integration improvements, more standard
-  installation directories, various small fixes.
-
-* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
-  thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
-
-* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
-
-* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
-
-* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
-  improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
-
 * [Older release notes](#older-release-notes)

 Special thanks to:
@ -279,43 +269,48 @@ completely and redirect all calls to the _mimalloc_ library instead .

 ## Environment Options

-You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)),
-or via environment variables:
+You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables:

 - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
 - `MIMALLOC_VERBOSE=1`: show verbose messages.
 - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS
-   that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
-   programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs.
-   As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
-   reset occur less frequently instead of turning it off completely.
+
+Advanced options:
+
+- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge 
+   OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which 
+   can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when
+   a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher
+   value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times.
+   Setting it to `-1` disables purging completely.   
+- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc 
+   allocates segments and pages. This is by default 
+   only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS)
+   may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set 
+   (rss) so it is generally ok to enable this.
+
+Further options for large workloads and services:
+
 - `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected
   at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
   the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
   nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
+- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
   improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
   to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
   the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
-   can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
-   <!--
-   - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
-   show in the working set even though usually just a small part is committed to physical memory. This is why it
-   turned off by default on Windows as it looks not good in the task manager. However, turning it on has no
-   real drawbacks and may improve performance by a little.
-   -->
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
+   can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).   
+- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
   startup and sometimes this can give a large (latency) performance improvement on big workloads.
-   Usually it is better to not use
-   `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
+   Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large 
+   OS pages, use with care as reserving
   contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
   startup only once).
   Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
   With huge OS pages, it may be beneficial to set the setting
   `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
   of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
-   and allocate just a little to take up space in the huge OS page area (which cannot be reset).
+   and allocate just a little to take up space in the huge OS page area (which cannot be purged).
   The huge pages are usually allocated evenly among NUMA nodes.
   We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
   the huge pages at a specific numa node instead.
@ -517,7 +512,7 @@ Adress sanitizer support is in its initial development -- please report any issu
 ### ETW

 Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though
-mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACE_ETW=ON` cmake option. 
+mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACK_ETW=ON` cmake option. 

 You can then capture an allocation trace using the Windows performance recorder (WPR), using the 
 `src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use:
@ -793,6 +788,16 @@ provided by the bot. You will only need to do this once across all repos using o

 # Older Release Notes

+* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
+  M1), improved performance for v2 for large objects, Python integration improvements, more standard
+  installation directories, various small fixes.
+* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
+  thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
+* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
+* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
+* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
+  improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
+
 * 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved
  handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call.
 * 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations,
@ -814,6 +819,7 @@ provided by the bot. You will only need to do this once across all repos using o
 more eager concurrent free, addition of STL allocator, fixed potential memory leak.
 * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger
 free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode.
+
 * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates.
 * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
 * 2019-10-07, `v1.1.0`: stable release 1.1.
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@ -79,7 +79,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
    // for the tracker, on huge aligned allocations only from the start of the large block is defined
    mi_track_mem_undefined(aligned_p, size);
    if (zero) {
-      _mi_memzero(aligned_p, mi_usable_size(aligned_p));
+      _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
    }
  }

@ -93,21 +93,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
 static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
 {
  // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
-  mi_assert(alignment > 0);
  if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
    #if MI_DEBUG > 0
    _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
    #endif
    return NULL;
  }
-  /*
-  if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {  // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers)
-    #if MI_DEBUG > 0
-    _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment);
-    #endif
-    return NULL;
-  }
-  */
+
  if mi_unlikely(size > PTRDIFF_MAX) {          // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
    #if MI_DEBUG > 0
    _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
@ -147,9 +139,9 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he
 }

 mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
+  if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL;
  #if !MI_PADDING
  // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
-  if (!_mi_is_power_of_two(alignment)) return NULL;
  if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
  #else
  // with padding, we can only guarantee this for fixed alignments
@ -165,6 +157,11 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap,
  }
 }

+// ensure a definition is emitted
+#if defined(__cplusplus)
+static void* _mi_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned;
+#endif
+
 // ------------------------------------------------------
 // Aligned Allocation
 // ------------------------------------------------------
@ -226,19 +223,13 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne
    return p;  // reallocation still fits, is aligned and not more than 50% waste
  }
  else {
+    // note: we don't zero allocate upfront so we only zero initialize the expanded part
    void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset);
    if (newp != NULL) {
      if (zero && newsize > size) {
-        const mi_page_t* page = _mi_ptr_page(newp);
-        if (page->is_zero) {
-          // already zero initialized
-          mi_assert_expensive(mi_mem_is_zero(newp,newsize));
-        }
-        else {
-          // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
-          size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
-          memset((uint8_t*)newp + start, 0, newsize - start);
-        }
+        // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
+        size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
+        _mi_memzero((uint8_t*)newp + start, newsize - start);
      }
      _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
      mi_free(p); // only free if successful
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@ -245,11 +245,13 @@ extern "C" {
  int    posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }

  // `aligned_alloc` is only available when __USE_ISOC11 is defined.
+  // Note: it seems __USE_ISOC11 is not defined in musl (and perhaps other libc's) so we only check
+  // for it if using glibc.
  // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
  // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
  // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it
  // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we  can avoid overriding it ourselves.
-  #if __USE_ISOC11
+  #if !defined(__GLIBC__) || __USE_ISOC11
  void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
  #endif
 #endif
--- a/src/alloc-posix.c
+++ b/src/alloc-posix.c
@ -56,7 +56,8 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept
  // Note: The spec dictates we should not modify `*p` on an error. (issue#27)
  // <http://man7.org/linux/man-pages/man3/posix_memalign.3.html>
  if (p == NULL) return EINVAL;
-  if (alignment % sizeof(void*) != 0) return EINVAL;                   // natural alignment
+  if ((alignment % sizeof(void*)) != 0) return EINVAL;                 // natural alignment
+  // it is also required that alignment is a power of 2 and > 0; this is checked in `mi_malloc_aligned`
  if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL;  // not a power of 2
  void* q = mi_malloc_aligned(size, alignment);
  if (q==NULL && size != 0) return ENOMEM;
--- a/src/alloc.c
+++ b/src/alloc.c
@ -37,6 +37,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
  page->used++;
  page->free = mi_block_next(page, block);
  mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
+  #if MI_DEBUG>3
+  if (page->free_is_zero) {
+    mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block)));
+  }
+  #endif

  // allow use of the block internally
  // note: when tracking we need to avoid ever touching the MI_PADDING since
@ -46,12 +51,18 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
  // zero the block? note: we need to zero the full block size (issue #63)
  if mi_unlikely(zero) {
    mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
-    const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size);
-    _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE);
+    mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE);
+    if (page->free_is_zero) {
+      block->next = 0;
+      mi_track_mem_defined(block, page->xblock_size - MI_PADDING_SIZE);
+    }
+    else {
+      _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE);
+    }    
  }

 #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
-  if (!page->is_zero && !zero && !mi_page_is_huge(page)) {
+  if (!zero && !mi_page_is_huge(page)) {
    memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
  }
 #elif (MI_SECURE!=0)
@ -110,6 +121,11 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
    mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
  }
  #endif
+  #if MI_DEBUG>3
+  if (p != NULL && zero) {
+    mi_assert_expensive(mi_mem_is_zero(p, size));
+  }
+  #endif
  return p;
 }

@ -139,6 +155,11 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z
      mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
    }
    #endif
+    #if MI_DEBUG>3
+    if (p != NULL && zero) {
+      mi_assert_expensive(mi_mem_is_zero(p, size));
+    }
+    #endif
    return p;
  }
 }
@ -691,6 +712,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
    mi_assert_internal(p!=NULL);
    // todo: do not track as the usable size is still the same in the free; adjust potential padding?
    // mi_track_resize(p,size,newsize)
+    // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); }
    return p;  // reallocation still fits and not more than 50% waste
  }
  void* newp = mi_heap_malloc(heap,newsize);
@ -698,14 +720,15 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
    if (zero && newsize > size) {
      // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
      const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
-      memset((uint8_t*)newp + start, 0, newsize - start);
+      _mi_memzero((uint8_t*)newp + start, newsize - start);
+    }
+    else if (newsize == 0) {
+      ((uint8_t*)newp)[0] = 0; // work around for applications that expect zero-reallocation to be zero initialized (issue #725)
    }
    if mi_likely(p != NULL) {
-      if mi_likely(_mi_is_aligned(p, sizeof(uintptr_t))) {  // a client may pass in an arbitrary pointer `p`..
-        const size_t copysize = (newsize > size ? size : newsize);
-        mi_track_mem_defined(p,copysize);  // _mi_useable_size may be too large for byte precise memory tracking..
-        _mi_memcpy_aligned(newp, p, copysize);
-      }
+      const size_t copysize = (newsize > size ? size : newsize);
+      mi_track_mem_defined(p,copysize);  // _mi_useable_size may be too large for byte precise memory tracking..
+      _mi_memcpy(newp, p, copysize);
      mi_free(p); // only free the original pointer if successful
    }
  }
@ -1030,7 +1053,7 @@ void* _mi_externs[] = {
  (void*)&mi_zalloc_small,
  (void*)&mi_heap_malloc,
  (void*)&mi_heap_zalloc,
-  (void*)&mi_heap_malloc_small
+  (void*)&mi_heap_malloc_small,
  // (void*)&mi_heap_alloc_new,
  // (void*)&mi_heap_alloc_new_n
 };
--- a/src/arena.c
+++ b/src/arena.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019-2022, Microsoft Research, Daan Leijen
+Copyright (c) 2019-2023, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -23,7 +23,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
 #include "mimalloc/atomic.h"

 #include <string.h>  // memset
-#include <errno.h> // ENOMEM
+#include <errno.h>   // ENOMEM

 #include "bitmap.h"  // atomic bitmap

@ -36,22 +36,25 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
 typedef uintptr_t mi_block_info_t;
 #define MI_ARENA_BLOCK_SIZE   (MI_SEGMENT_SIZE)        // 64MiB  (must be at least MI_SEGMENT_ALIGN)
 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 32MiB
-#define MI_MAX_ARENAS         (64)                     // not more than 126 (since we use 7 bits in the memid and an arena index + 1)
+#define MI_MAX_ARENAS         (112)                    // not more than 126 (since we use 7 bits in the memid and an arena index + 1)

 // A memory arena descriptor
 typedef struct mi_arena_s {
  mi_arena_id_t id;                       // arena id; 0 for non-specific
-  bool     exclusive;                     // only allow allocations if specifically for this arena
+  mi_memid_t memid;                       // memid of the memory area
  _Atomic(uint8_t*) start;                // the start of the memory area
  size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
  size_t   field_count;                   // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
+  size_t   meta_size;                     // size of the arena structure itself (including its bitmaps)
+  mi_memid_t meta_memid;                  // memid of the arena structure itself (OS or static allocation)
  int      numa_node;                     // associated NUMA node
-  bool     is_zero_init;                  // is the arena zero initialized?
-  bool     allow_decommit;                // is decommit allowed? if true, is_large should be false and blocks_committed != NULL
-  bool     is_large;                      // large- or huge OS pages (always committed)
+  bool     exclusive;                     // only allow allocations if specifically for this arena  
+  bool     is_large;                      // memory area consists of large- or huge OS pages (always committed)
  _Atomic(size_t) search_idx;             // optimization to start the search for free blocks
+  _Atomic(mi_msecs_t) purge_expire;       // expiration time when blocks should be decommitted from `blocks_decommit`.  
  mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
  mi_bitmap_field_t* blocks_committed;    // are the blocks committed? (can be NULL for memory that cannot be decommitted)
+  mi_bitmap_field_t* blocks_purge;        // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)  
  mi_bitmap_field_t  blocks_inuse[1];     // in-place bitmap of in-use blocks (of size `field_count`)
 } mi_arena_t;

@ -61,9 +64,10 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
 static mi_decl_cache_align _Atomic(size_t)      mi_arena_count; // = 0


+//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept;
+
 /* -----------------------------------------------------------
  Arena id's
-  0 is used for non-arena's (like OS memory)
  id = arena_index + 1
 ----------------------------------------------------------- */

@ -73,10 +77,7 @@ static size_t mi_arena_id_index(mi_arena_id_t id) {

 static mi_arena_id_t mi_arena_id_create(size_t arena_index) {
  mi_assert_internal(arena_index < MI_MAX_ARENAS);
-  mi_assert_internal(MI_MAX_ARENAS <= 126);
-  int id = (int)arena_index + 1;
-  mi_assert_internal(id >= 1 && id <= 127);
-  return id;
+  return (int)arena_index + 1;
 }

 mi_arena_id_t _mi_arena_id_none(void) {
@ -88,50 +89,123 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus
          (arena_id == req_arena_id));
 }

+bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) {
+  if (memid.memkind == MI_MEM_ARENA) {
+    return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
+  }
+  else {
+    return mi_arena_id_is_suitable(0, false, request_arena_id);
+  }
+}
+
+bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) {
+  return (memid.memkind == MI_MEM_OS);
+}

 /* -----------------------------------------------------------
-  Arena allocations get a memory id where the lower 8 bits are
-  the arena id, and the upper bits the block index.
+  Arena allocations get a (currently) 16-bit memory id where the 
+  lower 8 bits are the arena id, and the upper bits the block index.
 ----------------------------------------------------------- */

-// Use `0` as a special id for direct OS allocated memory.
-#define MI_MEMID_OS   0
-
-static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_index_t bitmap_index) {
-  mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
-  mi_assert_internal(id >= 0 && id <= 0x7F);
-  return ((bitmap_index << 8) | ((uint8_t)id & 0x7F) | (exclusive ? 0x80 : 0));
-}
-
-static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
-  *bitmap_index = (arena_memid >> 8);
-  mi_arena_id_t id = (int)(arena_memid & 0x7F);
-  *arena_index = mi_arena_id_index(id);
-  return ((arena_memid & 0x80) != 0);
-}
-
-bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) {
-  mi_arena_id_t id = (int)(arena_memid & 0x7F);
-  bool exclusive = ((arena_memid & 0x80) != 0);
-  return mi_arena_id_is_suitable(id, exclusive, request_arena_id);
-}
-
-bool _mi_arena_is_os_allocated(size_t arena_memid) {
-  return (arena_memid == MI_MEMID_OS);
-}
-
 static size_t mi_block_count_of_size(size_t size) {
  return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
 }

+static size_t mi_arena_block_size(size_t bcount) {
+  return (bcount * MI_ARENA_BLOCK_SIZE);
+}
+
+static size_t mi_arena_size(mi_arena_t* arena) {
+  return mi_arena_block_size(arena->block_count);
+}
+
+static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) {
+  mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA);
+  memid.mem.arena.id = id;
+  memid.mem.arena.block_index = bitmap_index;
+  memid.mem.arena.is_exclusive = is_exclusive;
+  return memid;
+}
+
+static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
+  mi_assert_internal(memid.memkind == MI_MEM_ARENA);
+  *arena_index = mi_arena_id_index(memid.mem.arena.id);
+  *bitmap_index = memid.mem.arena.block_index;
+  return memid.mem.arena.is_exclusive;
+}
+
+
+
+/* -----------------------------------------------------------
+  Special static area for mimalloc internal structures
+  to avoid OS calls (for example, for the arena metadata)
+----------------------------------------------------------- */
+
+#define MI_ARENA_STATIC_MAX  (MI_INTPTR_SIZE*MI_KiB)  // 8 KiB on 64-bit
+
+static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX];
+static _Atomic(size_t) mi_arena_static_top;
+
+static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) {
+  *memid = _mi_memid_none();
+  if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL;
+  if ((mi_atomic_load_relaxed(&mi_arena_static_top) + size) > MI_ARENA_STATIC_MAX) return NULL;
+
+  // try to claim space
+  if (alignment == 0) { alignment = 1; }
+  const size_t oversize = size + alignment - 1;
+  if (oversize > MI_ARENA_STATIC_MAX) return NULL;
+  const size_t oldtop = mi_atomic_add_acq_rel(&mi_arena_static_top, oversize);
+  size_t top = oldtop + oversize;
+  if (top > MI_ARENA_STATIC_MAX) {
+    // try to roll back, ok if this fails
+    mi_atomic_cas_strong_acq_rel(&mi_arena_static_top, &top, oldtop);
+    return NULL;
+  }
+
+  // success
+  *memid = _mi_memid_create(MI_MEM_STATIC);
+  const size_t start = _mi_align_up(oldtop, alignment);
+  uint8_t* const p = &mi_arena_static[start];
+  _mi_memzero(p, size);
+  return p;
+}
+
+static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
+  *memid = _mi_memid_none();
+
+  // try static
+  void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid);
+  if (p != NULL) return p;
+
+  // or fall back to the OS
+  return _mi_os_alloc(size, memid, stats);
+}
+
+static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) {
+  if (mi_memkind_is_os(memid.memkind)) {
+    _mi_os_free(p, size, memid, stats);
+  }
+  else {
+    mi_assert(memid.memkind == MI_MEM_STATIC);
+  }
+}
+
+static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) {
+  return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex)));
+}
+
+
 /* -----------------------------------------------------------
  Thread safe allocation in an arena
 ----------------------------------------------------------- */
-static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
+
+// claim the `blocks_inuse` bits
+static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
 {
  size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
  if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
-    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around
+    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around    
    return true;
  };
  return false;
@ -142,92 +216,116 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
  Arena Allocation
 ----------------------------------------------------------- */

-static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
-                                                   bool* commit, bool* large, bool* is_pinned, bool* is_zero, 
-                                                   mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
+static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
+                                                    bool commit, mi_memid_t* memid, mi_os_tld_t* tld)
 {
  MI_UNUSED(arena_index);
  mi_assert_internal(mi_arena_id_index(arena->id) == arena_index);
-  if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL;

  mi_bitmap_index_t bitmap_index;
-  if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
+  if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL;

-  // claimed it! set the dirty bits (todo: no need for an atomic op here?)
-  void* p    = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
-  *memid     = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index);
-  *is_zero   = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
-  *large     = arena->is_large;
-  *is_pinned = (arena->is_large || !arena->allow_decommit);
+  // claimed it! 
+  void* p = mi_arena_block_start(arena, bitmap_index);
+  *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index);
+  memid->is_pinned = arena->memid.is_pinned;
+
+  // none of the claimed blocks should be scheduled for a decommit
+  if (arena->blocks_purge != NULL) {
+    // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`).
+    _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index);
+  }
+
+  // set the dirty bits (todo: no need for an atomic op here?)
+  if (arena->memid.initially_zero && arena->blocks_dirty != NULL) {
+    memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
+  }
+
+  // set commit state
  if (arena->blocks_committed == NULL) {
    // always committed
-    *commit = true;
+    memid->initially_committed = true;
  }
-  else if (*commit) {
-    // arena not committed as a whole, but commit requested: ensure commit now
+  else if (commit) {
+    // commit requested, but the range may not be committed as a whole: ensure it is committed now
+    memid->initially_committed = true;
    bool any_uncommitted;
    _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
    if (any_uncommitted) {
-      bool commit_zero;
-      _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
-      if (commit_zero) *is_zero = true;
+      bool commit_zero = false;
+      if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) {
+        memid->initially_committed = false;
+      }
+      else {
+        if (commit_zero) { memid->initially_zero = true; }
+      }
    }
  }
  else {
    // no need to commit, but check if already fully committed
-    *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
+    memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
  }
+  
  return p;
 }

+// allocate in a speficic arena
+static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, 
+                                       bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) 
+{
+  MI_UNUSED_RELEASE(alignment);
+  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
+  const size_t bcount = mi_block_count_of_size(size);  
+  const size_t arena_index = mi_arena_id_index(arena_id);
+  mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
+  mi_assert_internal(size <= mi_arena_block_size(bcount));
+  
+  // Check arena suitability
+  mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
+  if (arena == NULL) return NULL;
+  if (!allow_large && arena->is_large) return NULL;
+  if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL;
+  if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity
+    const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
+    if (match_numa_node) { if (!numa_suitable) return NULL; }
+                    else { if (numa_suitable) return NULL; }
+  }
+
+  // try to allocate
+  void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, memid, tld);
+  mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment));
+  return p;
+}
+
+
 // allocate from an arena with fallback to the OS
-static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large,
-                                                bool* is_pinned, bool* is_zero,
-                                                mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld )
+static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, 
+                                                  bool commit, bool allow_large,
+                                                  mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
 {
  MI_UNUSED(alignment);
  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
-  const size_t bcount = mi_block_count_of_size(size);
  if mi_likely(max_arena == 0) return NULL;
-  mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE);
-
-  size_t arena_index = mi_arena_id_index(req_arena_id);
-  if (arena_index < MI_MAX_ARENAS) {
-    // try a specific arena if requested
-    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
-    if ((arena != NULL) &&
-        (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
-        (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-    {
-      void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-      mi_assert_internal((uintptr_t)p % alignment == 0);
+  
+  if (req_arena_id != _mi_arena_id_none()) {
+    // try a specific arena if requested 
+    if (mi_arena_id_index(req_arena_id) < max_arena) {
+      void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
      if (p != NULL) return p;
    }
  }
  else {
    // try numa affine allocation
-    for (size_t i = 0; i < max_arena; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena == NULL) break; // end reached
-      if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
-      }
+    for (size_t i = 0; i < max_arena; i++) {    
+      void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
+      if (p != NULL) return p;
    }

    // try from another numa node instead..
-    for (size_t i = 0; i < max_arena; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena == NULL) break; // end reached
-      if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local!
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
+    if (numa_node >= 0) {  // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already
+      for (size_t i = 0; i < max_arena; i++) {
+        void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
        if (p != NULL) return p;
      }
    }
@ -235,75 +333,294 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
  return NULL;
 }

-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
-                              mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
+// try to reserve a fresh arena space
+static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id)
 {
-  mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL);
-  mi_assert_internal(size > 0);
-  *memid   = MI_MEMID_OS;
-  *is_zero = false;
-  *is_pinned = false;
+  if (_mi_preloading()) return false;  // use OS only while pre loading
+  if (req_arena_id != _mi_arena_id_none()) return false;
+
+  const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count);
+  if (arena_count > (MI_MAX_ARENAS - 4)) return false;
+
+  size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve);
+  if (arena_reserve == 0) return false;
+
+  if (!_mi_os_has_virtual_reserve()) { 
+    arena_reserve = arena_reserve/4;  // be conservative if virtual reserve is not supported (for some embedded systems for example)
+  }
+  arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
+  if (arena_count >= 8 && arena_count <= 128) {
+    arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve;  // scale up the arena sizes exponentially
+  }    
+  if (arena_reserve < req_size) return false;  // should be able to at least handle the current allocation size
+      
+  // commit eagerly?
+  bool arena_commit = false;
+  if (mi_option_get(mi_option_arena_eager_commit) == 2)      { arena_commit = _mi_os_has_overcommit(); }
+  else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
+
+  return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0);
+}    
+
+
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large,
+                              mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
+{
+  mi_assert_internal(memid != NULL && tld != NULL);
+  mi_assert_internal(size > 0);
+  *memid = _mi_memid_none();

-  bool default_large = false;
-  if (large == NULL) large = &default_large;   // ensure `large != NULL`
  const int numa_node = _mi_os_numa_node(tld); // current numa node

  // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
  if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
-    void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-    if (p != NULL) return p;
+    void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
+    if (p != NULL) return p;    
+
+    // otherwise, try to first eagerly reserve a new arena 
+    if (req_arena_id == _mi_arena_id_none()) {
+      mi_arena_id_t arena_id = 0;
+      if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
+        // and try allocate in there
+        mi_assert_internal(req_arena_id == _mi_arena_id_none());
+        p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
+        if (p != NULL) return p;
+      }
+    }
  }

-  // finally, fall back to the OS
+  // if we cannot use OS allocation, return NULL
  if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) {
    errno = ENOMEM;
    return NULL;
  }
-  *is_zero = true;
-  *memid   = MI_MEMID_OS;
-  void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats);
-  if (p != NULL) { *is_pinned = *large; }
-  return p;
+   
+  // finally, fall back to the OS
+  if (align_offset > 0) {
+    return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats);
+  }
+  else {
+    return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats);
+  }  
 }

-void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
+void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
 {
-  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld);
 }

+
 void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
  if (size != NULL) *size = 0;
  size_t arena_index = mi_arena_id_index(arena_id);
  if (arena_index >= MI_MAX_ARENAS) return NULL;
-  mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
+  mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
  if (arena == NULL) return NULL;
-  if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE;
+  if (size != NULL) { *size = mi_arena_block_size(arena->block_count); }
  return arena->start;
 }

+
+/* -----------------------------------------------------------
+  Arena purge
+----------------------------------------------------------- */
+
+static long mi_arena_purge_delay(void) {
+  // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay
+  return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult));
+}
+
+// reset or decommit in an arena and update the committed/decommit bitmaps
+// assumes we own the area (i.e. blocks_in_use is claimed by us)
+static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) {
+  mi_assert_internal(arena->blocks_committed != NULL);
+  mi_assert_internal(arena->blocks_purge != NULL);
+  mi_assert_internal(!arena->memid.is_pinned);
+  const size_t size = mi_arena_block_size(blocks);
+  void* const p = mi_arena_block_start(arena, bitmap_idx); 
+  bool needs_recommit;
+  if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
+    // all blocks are committed, we can purge freely
+    needs_recommit = _mi_os_purge(p, size, stats);
+  }
+  else {
+    // some blocks are not committed -- this can happen when a partially committed block is freed 
+    // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
+    // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), 
+    // and also undo the decommit stats (as it was already adjusted)
+    mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
+    needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats);
+    _mi_stat_increase(&stats->committed, size);
+  }
+  
+  // clear the purged blocks
+  _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
+  // update committed bitmap
+  if (needs_recommit) {
+    _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
+  }
+}
+
+// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls.
+// Note: assumes we (still) own the area as we may purge immediately
+static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) {
+  mi_assert_internal(arena->blocks_purge != NULL);
+  const long delay = mi_arena_purge_delay();
+  if (delay < 0) return;  // is purging allowed at all?
+
+  if (_mi_preloading() || delay == 0) {
+    // decommit directly
+    mi_arena_purge(arena, bitmap_idx, blocks, stats);    
+  }
+  else {
+    // schedule decommit
+    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
+    if (expire != 0) {
+      mi_atomic_addi64_acq_rel(&arena->purge_expire, delay/10);  // add smallish extra delay
+    }
+    else {
+      mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay);
+    }
+    _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL);
+  }
+}
+
+// purge a range of blocks
+// return true if the full range was purged.
+// assumes we own the area (i.e. blocks_in_use is claimed by us)
+static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) {
+  const size_t endidx = startidx + bitlen;
+  size_t bitidx = startidx;
+  bool all_purged = false;
+  while (bitidx < endidx) {
+    // count consequetive ones in the purge mask
+    size_t count = 0;
+    while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) {
+      count++;
+    }
+    if (count > 0) {
+      // found range to be purged
+      const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx);
+      mi_arena_purge(arena, range_idx, count, stats);
+      if (count == bitlen) {
+        all_purged = true;
+      }
+    }
+    bitidx += (count+1); // +1 to skip the zero bit (or end)
+  }
+  return all_purged;
+}
+
+// returns true if anything was purged
+static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) 
+{
+  if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false;
+  mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
+  if (expire == 0) return false;
+  if (!force && expire > now) return false;
+
+  // reset expire (if not already set concurrently)
+  mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0);
+  
+  // potential purges scheduled, walk through the bitmap
+  bool any_purged = false;
+  bool full_purge = true;  
+  for (size_t i = 0; i < arena->field_count; i++) {
+    size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]);
+    if (purge != 0) {
+      size_t bitidx = 0;
+      while (bitidx < MI_BITMAP_FIELD_BITS) {
+        // find consequetive range of ones in the purge mask
+        size_t bitlen = 0;
+        while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) {
+          bitlen++;
+        }
+        // try to claim the longest range of corresponding in_use bits
+        const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx);
+        while( bitlen > 0 ) {
+          if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) {
+            break;
+          }
+          bitlen--;
+        }
+        // actual claimed bits at `in_use`
+        if (bitlen > 0) {
+          // read purge again now that we have the in_use bits
+          purge = mi_atomic_load_acquire(&arena->blocks_purge[i]);
+          if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) {
+            full_purge = false;
+          }
+          any_purged = true;
+          // release the claimed `in_use` bits again
+          _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index);
+        }
+        bitidx += (bitlen+1);  // +1 to skip the zero (or end)
+      } // while bitidx
+    } // purge != 0
+  }
+  // if not fully purged, make sure to purge again in the future
+  if (!full_purge) {
+    const long delay = mi_arena_purge_delay();
+    mi_msecs_t expected = 0;
+    mi_atomic_casi64_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay);
+  }
+  return any_purged;
+}
+
+static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) {
+  if (_mi_preloading() || mi_arena_purge_delay() <= 0) return;  // nothing will be scheduled
+
+  const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count);
+  if (max_arena == 0) return;
+
+  // allow only one thread to purge at a time
+  static mi_atomic_guard_t purge_guard;
+  mi_atomic_guard(&purge_guard) 
+  {
+    mi_msecs_t now = _mi_clock_now();
+    size_t max_purge_count = (visit_all ? max_arena : 1);
+    for (size_t i = 0; i < max_arena; i++) {
+      mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
+      if (arena != NULL) {
+        if (mi_arena_try_purge(arena, now, force, stats)) {
+          if (max_purge_count <= 1) break;
+          max_purge_count--;
+        }
+      }
+    }
+  }  
+}
+
+
 /* -----------------------------------------------------------
  Arena free
 ----------------------------------------------------------- */

-void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) {
+void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) {
  mi_assert_internal(size > 0 && stats != NULL);
+  mi_assert_internal(committed_size <= size);
  if (p==NULL) return;
  if (size==0) return;
-
-  if (memid == MI_MEMID_OS) {
+  const bool all_committed = (committed_size == size);
+  
+  if (mi_memkind_is_os(memid.memkind)) {
    // was a direct OS allocation, pass through
-    _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats);
+    if (!all_committed && committed_size > 0) {
+      // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size)
+      _mi_stat_decrease(&stats->committed, committed_size);
+    }
+    _mi_os_free(p, size, memid, stats);
  }
-  else {
+  else if (memid.memkind == MI_MEM_ARENA) {
    // allocated in an arena
-    mi_assert_internal(align_offset == 0);
    size_t arena_idx;
    size_t bitmap_idx;
    mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
    mi_assert_internal(arena_idx < MI_MAX_ARENAS);
-    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]);
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]);
    mi_assert_internal(arena != NULL);
    const size_t blocks = mi_block_count_of_size(size);
+    
    // checks
    if (arena == NULL) {
      _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
@ -314,24 +631,100 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset,
      _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
      return;
    }
+
+    // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
+    mi_track_mem_undefined(p,size);
+
    // potentially decommit
-    if (!arena->allow_decommit || arena->blocks_committed == NULL) {
-      mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c)
+    if (arena->memid.is_pinned || arena->blocks_committed == NULL) {
+      mi_assert_internal(all_committed);
    }
    else {
      mi_assert_internal(arena->blocks_committed != NULL);
-      _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails
-      _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
+      mi_assert_internal(arena->blocks_purge != NULL);
+      
+      if (!all_committed) {
+        // mark the entire range as no longer committed (so we recommit the full range when re-using)
+        _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
+        mi_track_mem_noaccess(p,size);
+        if (committed_size > 0) {
+          // if partially committed, adjust the committed stats (is it will be recommitted when re-using)
+          // in the delayed purge, we now need to not count a decommit if the range is not marked as committed.
+          _mi_stat_decrease(&stats->committed, committed_size);
+        }
+        // note: if not all committed, it may be that the purge will reset/decommit the entire range
+        // that contains already decommitted parts. Since purge consistently uses reset or decommit that
+        // works (as we should never reset decommitted parts).
+      }
+      // (delay) purge the entire range
+      mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);      
    }
+    
    // and make it available to others again
    bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
    if (!all_inuse) {
-      _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size);
+      _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size);
      return;
    };
  }
+  else {
+    // arena was none, external, or static; nothing to do
+    mi_assert_internal(memid.memkind < MI_MEM_OS);
+  }
+
+  // purge expired decommits
+  mi_arenas_try_purge(false, false, stats);
 }

+// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
+// for dynamic libraries that are unloaded and need to release all their allocated memory.
+static void mi_arenas_unsafe_destroy(void) {
+  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+  size_t new_max_arena = 0;
+  for (size_t i = 0; i < max_arena; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
+    if (arena != NULL) {
+      if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {      
+        mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
+        _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); 
+      }
+      else {
+        new_max_arena = i;
+      }
+      mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size, &_mi_stats_main);
+    }
+  }
+
+  // try to lower the max arena.
+  size_t expected = max_arena;
+  mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena);
+}
+
+// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired
+void _mi_arena_collect(bool force_purge, mi_stats_t* stats) {
+  mi_arenas_try_purge(force_purge, true /* visit all */, stats);
+}
+
+// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
+// for dynamic libraries that are unloaded and need to release all their allocated memory.
+void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
+  mi_arenas_unsafe_destroy();
+  _mi_arena_collect(true /* force purge */, stats);  // purge non-owned arenas  
+}
+
+// Is a pointer inside any of our arenas?
+bool _mi_arena_contains(const void* p) {
+  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+  for (size_t i = 0; i < max_arena; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
+    if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { 
+      return true;      
+    }
+  }
+  return false;
+}
+
+
 /* -----------------------------------------------------------
  Add an arena.
 ----------------------------------------------------------- */
@ -340,53 +733,58 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
  mi_assert_internal(arena != NULL);
  mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
  mi_assert_internal(arena->block_count > 0);
-  if (arena_id != NULL) *arena_id = -1;
+  if (arena_id != NULL) { *arena_id = -1; }

  size_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
  if (i >= MI_MAX_ARENAS) {
    mi_atomic_decrement_acq_rel(&mi_arena_count);
    return false;
  }
-  mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
  arena->id = mi_arena_id_create(i);
-  if (arena_id != NULL) *arena_id = arena->id;
+  mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
+  if (arena_id != NULL) { *arena_id = arena->id; }
  return true;
 }

-bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept
+static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
 {
  if (arena_id != NULL) *arena_id = _mi_arena_id_none();
  if (size < MI_ARENA_BLOCK_SIZE) return false;

  if (is_large) {
-    mi_assert_internal(is_committed);
-    is_committed = true;
+    mi_assert_internal(memid.initially_committed && memid.is_pinned);
  }

  const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
  const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
-  const size_t bitmaps = (is_committed ? 2 : 3);
+  const size_t bitmaps = (memid.is_pinned ? 2 : 4);
  const size_t asize  = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
-  mi_arena_t* arena   = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
+  mi_memid_t meta_memid;
+  mi_arena_t* arena   = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
  if (arena == NULL) return false;
-
+  
+  // already zero'd due to os_alloc
+  // _mi_memzero(arena, asize);
  arena->id = _mi_arena_id_none();
+  arena->memid = memid;
  arena->exclusive = exclusive;
+  arena->meta_size = asize;
+  arena->meta_memid = meta_memid;
  arena->block_count = bcount;
  arena->field_count = fields;
  arena->start = (uint8_t*)start;
  arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
  arena->is_large     = is_large;
-  arena->is_zero_init = is_zero;
-  arena->allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory
+  arena->purge_expire = 0;
  arena->search_idx   = 0;
  arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
-  arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
-  // the bitmaps are already zero initialized due to os_alloc
+  arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
+  arena->blocks_purge  = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap  
  // initialize committed bitmap?
-  if (arena->blocks_committed != NULL && is_committed) {
+  if (arena->blocks_committed != NULL && arena->memid.initially_committed) {
    memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
  }
+  
  // and claim leftover blocks if needed (so we never allocate there)
  ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
  mi_assert_internal(post >= 0);
@ -395,32 +793,42 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is
    mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
    _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
  }
-
  return mi_arena_add(arena, arena_id);

 }

+bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+  mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
+  memid.initially_committed = is_committed;
+  memid.initially_zero = is_zero;
+  memid.is_pinned = is_large;
+  return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id);
+}
+
 // Reserve a range of regular OS memory
-int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept
-{
+int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
  if (arena_id != NULL) *arena_id = _mi_arena_id_none();
  size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
-  bool large = allow_large;
-  void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main);
-  if (start==NULL) return ENOMEM;
-  if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) {
-    _mi_os_free_ex(start, size, commit, &_mi_stats_main);
-    _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
+  mi_memid_t memid;
+  void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main);
+  if (start == NULL) return ENOMEM;
+  const bool is_large = memid.is_pinned; // todo: use separate is_large field?
+  if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) {
+    _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main);
+    _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024));
    return ENOMEM;
  }
-  _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : "");
+  _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : "");
  return 0;
 }

+
+// Manage a range of regular OS memory
 bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
-  return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false, NULL);
+  return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL);
 }

+// Reserve a range of regular OS memory
 int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept {
  return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL);
 }
@ -470,15 +878,16 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m
  if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
  size_t hsize = 0;
  size_t pages_reserved = 0;
-  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
+  mi_memid_t memid;
+  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid);
  if (p==NULL || pages_reserved==0) {
    _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
    return ENOMEM;
  }
  _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);

-  if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) {
-    _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
+  if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) {
+    _mi_os_free(p, hsize, memid, &_mi_stats_main);
    return ENOMEM;
  }
  return 0;
@ -524,3 +933,4 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
  if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
  return err;
 }
+
--- a/src/bitmap.c
+++ b/src/bitmap.c
@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019-2021 Microsoft Research, Daan Leijen
+Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@ -11,7 +11,6 @@ represeted as an array of fields where each field is a machine word (`size_t`)

 There are two api's; the standard one cannot have sequences that cross
 between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
-(this is used in region allocation)

 The `_across` postfixed functions do allow sequences that can cross over
 between the fields. (This is used in arena allocation)
@ -63,12 +62,12 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons

  // scan linearly for a free range of zero bits
  while (bitidx <= bitidx_max) {
-    const size_t mapm = map & m;
+    const size_t mapm = (map & m);
    if (mapm == 0) {  // are the mask bits free at bitidx?
      mi_assert_internal((m >> bitidx) == mask); // no overflow?
-      const size_t newmap = map | m;
+      const size_t newmap = (map | m);
      mi_assert_internal((newmap^map) >> bitidx == mask);
-      if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) {  // TODO: use strong cas here?
+      if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) {  // TODO: use weak cas here?
        // no success, another thread claimed concurrently.. keep going (with updated `map`)
        continue;
      }
@ -81,7 +80,8 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
    else {
      // on to the next bit range
 #ifdef MI_HAVE_FAST_BITSCAN
-      const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1);
+      mi_assert_internal(mapm != 0);
+      const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
      mi_assert_internal(shift > 0 && shift <= count);
 #else
      const size_t shift = 1;
@ -100,7 +100,7 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
 bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
  size_t idx = start_field_idx;
  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
-    if (idx >= bitmap_fields) idx = 0; // wrap
+    if (idx >= bitmap_fields) { idx = 0; } // wrap
    if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
      return true;
    }
@ -127,14 +127,6 @@ bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap
  return false;
 }

-/*
-// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
-// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
-bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) {
-  return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx);
-}
-*/
-
 // Set `count` bits at `bitmap_idx` to 0 atomically
 // Returns `true` if all `count` bits were 1 previously.
 bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
@ -143,7 +135,7 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count,
  const size_t mask = mi_bitmap_mask_(count, bitidx);
  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
  // mi_assert_internal((bitmap[idx] & mask) == mask);
-  size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
+  const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
  return ((prev & mask) == mask);
 }

@ -157,7 +149,7 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi
  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
  //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
  size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
-  if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
+  if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
  return ((prev & mask) == 0);
 }

@ -167,11 +159,28 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size
  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
  const size_t mask = mi_bitmap_mask_(count, bitidx);
  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
-  size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
-  if (any_ones != NULL) *any_ones = ((field & mask) != 0);
+  const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
+  if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
  return ((field & mask) == mask);
 }

+// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. 
+// Returns `true` if successful when all previous `count` bits were 0.
+bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+  const size_t idx = mi_bitmap_index_field(bitmap_idx);
+  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+  const size_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
+  size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
+  do  {    
+    if ((expected & mask) != 0) return false;
+  } 
+  while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
+  mi_assert_internal((expected & mask) == 0);
+  return true;
+}
+
+
 bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
  return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
 }
@ -190,6 +199,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t

 // Try to atomically claim a sequence of `count` bits starting from the field
 // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
+// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
 static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
 {
  mi_assert_internal(bitmap_idx != NULL);
@ -200,9 +210,9 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
  const size_t initial = mi_clz(map);  // count of initial zeros starting at idx
  mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
  if (initial == 0)     return false;
-  if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx);     // no need to cross fields
+  if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx);    // no need to cross fields (this case won't happen for us)
  if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
-
+  
  // scan ahead
  size_t found = initial;
  size_t mask = 0;     // mask bits for the final field
@ -210,25 +220,27 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
    field++;
    map = mi_atomic_load_relaxed(field);
    const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
+    mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
    mask = mi_bitmap_mask_(mask_bits, 0);
-    if ((map & mask) != 0) return false;
+    if ((map & mask) != 0) return false;  // some part is already claimed
    found += mask_bits;
  }
  mi_assert_internal(field < &bitmap[bitmap_fields]);

-  // found range of zeros up to the final field; mask contains mask in the final field
-  // now claim it atomically
+  // we found a range of contiguous zeros up to the final field; mask contains mask in the final field
+  // now try to claim the range atomically
  mi_bitmap_field_t* const final_field = field;
  const size_t final_mask = mask;
  mi_bitmap_field_t* const initial_field = &bitmap[idx];
-  const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial);
+  const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
+  const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);

  // initial field
  size_t newmap;
  field = initial_field;
  map = mi_atomic_load_relaxed(field);
  do {
-    newmap = map | initial_mask;
+    newmap = (map | initial_mask);
    if ((map & initial_mask) != 0) { goto rollback; };
  } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));

@ -243,31 +255,32 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
  mi_assert_internal(field == final_field);
  map = mi_atomic_load_relaxed(field);
  do {
-    newmap = map | final_mask;
+    newmap = (map | final_mask);
    if ((map & final_mask) != 0) { goto rollback; }
  } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));

  // claimed!
-  *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
+  *bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
  return true;

 rollback:
  // roll back intermediate fields
+  // (we just failed to claim `field` so decrement first)
  while (--field > initial_field) {
    newmap = 0;
    map = MI_BITMAP_FIELD_FULL;
    mi_assert_internal(mi_atomic_load_relaxed(field) == map);
    mi_atomic_store_release(field, newmap);
  }
-  if (field == initial_field) {
+  if (field == initial_field) {               // (if we failed on the initial field, `field + 1 == initial_field`)
    map = mi_atomic_load_relaxed(field);
    do {
      mi_assert_internal((map & initial_mask) == initial_mask);
-      newmap = map & ~initial_mask;
+      newmap = (map & ~initial_mask);
    } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
  }
  // retry? (we make a recursive call instead of goto to be able to use const declarations)
-  if (retries < 4) {
+  if (retries <= 2) {
    return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
  }
  else {
@ -280,17 +293,22 @@ rollback:
 // Starts at idx, and wraps around to search in all `bitmap_fields` fields.
 bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
  mi_assert_internal(count > 0);
-  if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
+  if (count <= 2) {
+    // we don't bother with crossover fields for small counts
+    return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
+  }
+
+  // visit the fields
  size_t idx = start_field_idx;
  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
-    if (idx >= bitmap_fields) idx = 0; // wrap
-    // try to claim inside the field
+    if (idx >= bitmap_fields) { idx = 0; } // wrap
+    // first try to claim inside a field
    if (count <= MI_BITMAP_FIELD_BITS) {
      if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
        return true;
      }
    }
-    // try to claim across fields
+    // if that fails, then try to claim across fields
    if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) {
      return true;
    }
@ -333,14 +351,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
  bool all_one = true;
  mi_bitmap_field_t* field = &bitmap[idx];
-  size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
+  size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);   // clear first part
  if ((prev & pre_mask) != pre_mask) all_one = false;
  while(mid_count-- > 0) {
-    prev = mi_atomic_and_acq_rel(field++, ~mid_mask);
+    prev = mi_atomic_and_acq_rel(field++, ~mid_mask);        // clear mid part
    if ((prev & mid_mask) != mid_mask) all_one = false;
  }
  if (post_mask!=0) {
-    prev = mi_atomic_and_acq_rel(field, ~post_mask);
+    prev = mi_atomic_and_acq_rel(field, ~post_mask);         // clear end part
    if ((prev & post_mask) != post_mask) all_one = false;
  }
  return all_one;
@ -370,7 +388,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
    if ((prev & post_mask) != 0) all_zero = false;
    if ((prev & post_mask) != post_mask) any_zero = true;
  }
-  if (pany_zero != NULL) *pany_zero = any_zero;
+  if (pany_zero != NULL) { *pany_zero = any_zero; }
  return all_zero;
 }

@ -399,7 +417,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
    if ((prev & post_mask) != post_mask) all_ones = false;
    if ((prev & post_mask) != 0) any_ones = true;
  }
-  if (pany_ones != NULL) *pany_ones = any_ones;
+  if (pany_ones != NULL) { *pany_ones = any_ones; }
  return all_ones;
 }

--- a/src/bitmap.h
+++ b/src/bitmap.h
@ -80,6 +80,10 @@ bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap
 // Returns `true` if all `count` bits were 1 previously.
 bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);

+// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. 
+// Returns `true` if successful when all previous `count` bits were 0.
+bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+
 // Set `count` bits at `bitmap_idx` to 1 atomically
 // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
 bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
--- a/src/heap.c
+++ b/src/heap.c
@ -154,8 +154,8 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
  mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
  mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );

-  // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list)
-  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
+  // collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list)
+  // note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment
  _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments);

  // collect segment local caches
@ -163,13 +163,10 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
    _mi_segment_thread_collect(&heap->tld->segments);
  }

-  // decommit in global segment caches
-  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
-  _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os);  
-
  // collect regions on program-exit (or shared library unload)
  if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
-    //_mi_mem_collect(&heap->tld->os);
+    _mi_thread_data_collect();  // collect thread data cache
+    _mi_arena_collect(true /* force purge */, &heap->tld->stats);
  }
 }

@ -209,16 +206,16 @@ mi_heap_t* mi_heap_get_backing(void) {
  return bheap;
 }

-mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) {
+mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
  mi_heap_t* bheap = mi_heap_get_backing();
  mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);  // todo: OS allocate in secure mode?
-  if (heap==NULL) return NULL;
+  if (heap == NULL) return NULL;
  _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
  heap->tld = bheap->tld;
  heap->thread_id = _mi_thread_id();
  heap->arena_id = arena_id;
  _mi_random_split(&bheap->random, &heap->random);
-  heap->cookie  = _mi_heap_random_next(heap) | 1;
+  heap->cookie = _mi_heap_random_next(heap) | 1;
  heap->keys[0] = _mi_heap_random_next(heap);
  heap->keys[1] = _mi_heap_random_next(heap);
  heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
@ -232,7 +229,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
  return mi_heap_new_in_arena(_mi_arena_id_none());
 }

-bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) {
+bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) {
  return _mi_arena_memid_is_suitable(memid, heap->arena_id);
 }

@ -365,7 +362,8 @@ void mi_heap_destroy(mi_heap_t* heap) {
  }
 }

-void _mi_heap_destroy_all(void) {
+// forcefully destroy all heaps in the current thread
+void _mi_heap_unsafe_destroy_all(void) {
  mi_heap_t* bheap = mi_heap_get_backing();
  mi_heap_t* curr = bheap->tld->heaps;
  while (curr != NULL) {
--- a/src/init.c
+++ b/src/init.c
@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file

 // Empty page used to initialize the small free pages array
 const mi_page_t _mi_page_empty = {
-  0, false, false, false, false,
+  0, false, false, false,
  0,       // capacity
  0,       // reserved capacity
  { 0 },   // flags
@ -37,6 +37,7 @@ const mi_page_t _mi_page_empty = {

 #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)

+#if (MI_SMALL_WSIZE_MAX==128)
 #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8)
 #define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
 #elif (MI_PADDING>0)
@ -44,7 +45,9 @@ const mi_page_t _mi_page_empty = {
 #else
 #define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() }
 #endif
-
+#else
+#error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX"
+#endif

 // Empty page queues for every bin
 #define QNULL(sz)  { NULL, NULL, (sz)*sizeof(uintptr_t) }
@ -79,8 +82,9 @@ const mi_page_t _mi_page_empty = {
  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
-  { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },     \
-  { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
+  MI_STAT_COUNT_NULL(), \
+  { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
+  { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
  MI_STAT_COUNT_END_NULL()


@ -199,6 +203,7 @@ mi_heap_t* _mi_heap_main_get(void) {
 typedef struct mi_thread_data_s {
  mi_heap_t  heap;  // must come first due to cast in `_mi_heap_done`
  mi_tld_t   tld;
+  mi_memid_t memid;
 } mi_thread_data_t;


@ -207,30 +212,44 @@ typedef struct mi_thread_data_s {
 // destroy many OS threads, this may causes too much overhead
 // per thread so we maintain a small cache of recently freed metadata.

-#define TD_CACHE_SIZE (8)
+#define TD_CACHE_SIZE (16)
 static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE];

-static mi_thread_data_t* mi_thread_data_alloc(void) {
+static mi_thread_data_t* mi_thread_data_zalloc(void) {
  // try to find thread metadata in the cache
-  mi_thread_data_t* td;
+  bool is_zero = false;
+  mi_thread_data_t* td = NULL;
  for (int i = 0; i < TD_CACHE_SIZE; i++) {
    td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
    if (td != NULL) {
+      // found cached allocation, try use it
      td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
      if (td != NULL) {
-        return td;
+        break;
      }
    }
  }
-  // if that fails, allocate directly from the OS
-  td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+
+  // if that fails, allocate as meta data
  if (td == NULL) {
-    // if this fails, try once more. (issue #257)
-    td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+    mi_memid_t memid;
+    td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main);
    if (td == NULL) {
-      // really out of memory
-      _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
+      // if this fails, try once more. (issue #257)
+      td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main);
+      if (td == NULL) {
+        // really out of memory
+        _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
+      }
    }
+    if (td != NULL) {
+      td->memid = memid;
+      is_zero = memid.initially_zero;
+    }
+  }
+  
+  if (td != NULL && !is_zero) {
+    _mi_memzero_aligned(td, sizeof(*td));
  }
  return td;
 }
@ -247,17 +266,17 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
    }
  }
  // if that fails, just free it directly
-  _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main);
+  _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main);
 }

-static void mi_thread_data_collect(void) {
+void _mi_thread_data_collect(void) {
  // free all thread metadata from the cache
  for (int i = 0; i < TD_CACHE_SIZE; i++) {
    mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
    if (td != NULL) {
      td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
      if (td != NULL) {
-        _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main );
+        _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main);
      }
    }
  }
@ -275,10 +294,9 @@ static bool _mi_heap_init(void) {
  }
  else {
    // use `_mi_os_alloc` to allocate directly from the OS
-    mi_thread_data_t* td = mi_thread_data_alloc();
+    mi_thread_data_t* td = mi_thread_data_zalloc();
    if (td == NULL) return false;

-    // OS allocated so already zero initialized
    mi_tld_t*  tld = &td->tld;
    mi_heap_t* heap = &td->heap;
    _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
@ -340,7 +358,6 @@ static bool _mi_heap_done(mi_heap_t* heap) {
    mi_thread_data_free((mi_thread_data_t*)heap);
  }
  else {
-    mi_thread_data_collect(); // free cached thread metadata
    #if 0
    // never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
    // there may still be delete/free calls after the mi_fls_done is called. Issue #207
@ -548,6 +565,9 @@ static void mi_detect_cpu_features(void) {
 void mi_process_init(void) mi_attr_noexcept {
  // ensure we are called once
  static mi_atomic_once_t process_init;
+	#if _MSC_VER < 1920
+	mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main
+	#endif
  if (!mi_atomic_once(&process_init)) return;
  _mi_process_is_initialized = true;
  _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
@ -606,7 +626,7 @@ static void mi_cdecl mi_process_done(void) {
  _mi_prim_thread_done_auto_done();
  
  #ifndef MI_SKIP_COLLECT_ON_EXIT
-    #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)
+    #if (MI_DEBUG || !defined(MI_SHARED_LIB))
    // free all memory if possible on process exit. This is not needed for a stand-alone process
    // but should be done if mimalloc is statically linked into another shared library which
    // is repeatedly loaded/unloaded, see issue #281.
@ -618,8 +638,9 @@ static void mi_cdecl mi_process_done(void) {
  // since after process_done there might still be other code running that calls `free` (like at_exit routines,
  // or C-runtime termination code.
  if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
-    _mi_heap_destroy_all();                          // forcefully release all memory held by all heaps (of this thread only!)
-    _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os);  // release all cached segments
+    mi_collect(true /* force */);
+    _mi_heap_unsafe_destroy_all();     // forcefully release all memory held by all heaps (of this thread only!)
+    _mi_arena_unsafe_destroy_all(& _mi_heap_main_get()->tld->stats);
  }

  if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
--- a/src/options.c
+++ b/src/options.c
@ -41,7 +41,7 @@ typedef struct mi_option_desc_s {
  mi_init_t   init;   // is it initialized yet? (from the environment)
  mi_option_t option; // for debugging: the option index should match the option
  const char* name;   // option name without `mimalloc_` prefix
-  const char* legacy_name; // potential legacy v1.x option name
+  const char* legacy_name; // potential legacy option name
 } mi_option_desc_t;

 #define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
@ -58,36 +58,38 @@ static mi_option_desc_t options[_mi_option_last] =
  { 0, UNINIT, MI_OPTION(show_stats) },
  { 0, UNINIT, MI_OPTION(verbose) },

-  // Some of the following options are experimental and not all combinations are valid. Use with care.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
-  { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) },
-  { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
-  { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
-  { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
+  // the following options are experimental and not all combinations make sense.
+  { 1, UNINIT, MI_OPTION(eager_commit) },               // commit per segment directly (4MiB)  (but see also `eager_commit_delay`)
+  { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux)
+  { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },        // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit)
+  { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) },    // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
+  { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },      // per 1GiB huge pages
+  {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) },   // reserve huge pages at node N
  { 0, UNINIT, MI_OPTION(reserve_os_memory)     },
-  { 0, UNINIT, MI_OPTION(deprecated_segment_cache) },  // cache N segments per thread
-  { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
-  { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates  
-  { 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
-  #if defined(__NetBSD__)
-  { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  #elif defined(_WIN32)
-  { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
+  { 0, UNINIT, MI_OPTION(deprecated_segment_cache) },   // cache N segments per thread
+  { 0, UNINIT, MI_OPTION(deprecated_page_reset) },      // reset page memory on free
+  { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) },       // reset free page memory when a thread terminates
+  { 0, UNINIT, MI_OPTION(deprecated_segment_reset) },   // reset segment memory on free (needs eager commit)
+#if defined(__NetBSD__)
+  { 0, UNINIT, MI_OPTION(eager_commit_delay) },         // the first N segments per thread are not eagerly committed
+#else
+  { 1, UNINIT, MI_OPTION(eager_commit_delay) },         // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
+#endif
+  { 10,  UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) },  // purge delay in milli-seconds
+  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },           // 0 = use available numa nodes, otherwise use at most N nodes.
+  { 0,   UNINIT, MI_OPTION(limit_os_alloc) },           // 1 = do not use OS memory for allocation (but only reserved arenas)
+  { 100, UNINIT, MI_OPTION(os_tag) },                   // only apple specific for now but might serve more or less related purpose
+  { 16,  UNINIT, MI_OPTION(max_errors) },               // maximum errors that are output
+  { 16,  UNINIT, MI_OPTION(max_warnings) },             // maximum warnings that are output
+  { 8,   UNINIT, MI_OPTION(max_segment_reclaim)},       // max. number of segment reclaims from the abandoned segments per try.
+  { 0,   UNINIT, MI_OPTION(destroy_on_exit)},           // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
+  #if (MI_INTPTR_SIZE>4)
+  { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) },  // reserve memory N KiB at a time
  #else
-  { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
+  {  128L * 1024L, UNINIT, MI_OPTION(arena_reserve) },
  #endif
-  { 25,   UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds
-  { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
-  { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
-  { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
-  { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
-  { 16,   UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
-  { 8,    UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.  
-  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
-  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
-  { 1,    UNINIT, MI_OPTION(decommit_extend_delay) },
-  { 0,    UNINIT, MI_OPTION(destroy_on_exit)}     // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
+  { 10,  UNINIT, MI_OPTION(arena_purge_mult) },        // purge delay multiplier for arena's
+  { 1,   UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
 };

 static void mi_option_init(mi_option_desc_t* desc);
@ -125,6 +127,12 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma
  return (x < min ? min : (x > max ? max : x));
 }

+mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) {
+  mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_arena_reserve);
+  long x = mi_option_get(option);
+  return (x < 0 ? 0 : (size_t)x * MI_KiB);
+}
+
 void mi_option_set(mi_option_t option, long value) {
  mi_assert(option >= 0 && option < _mi_option_last);
  if (option < 0 || option >= _mi_option_last) return;
@ -241,7 +249,7 @@ void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept {
 }

 // add stderr to the delayed output after the module is loaded
-static void mi_add_stderr_output() {
+static void mi_add_stderr_output(void) {
  mi_assert_internal(mi_out_default == NULL);
  mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr
  mi_out_default = &mi_out_buf_stderr;           // and add stderr to the delayed output
@ -496,27 +504,27 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {

 static void mi_option_init(mi_option_desc_t* desc) {
  // Read option value from the environment
-  char s[64+1];
+  char s[64 + 1];
  char buf[64+1];
  _mi_strlcpy(buf, "mimalloc_", sizeof(buf));
  _mi_strlcat(buf, desc->name, sizeof(buf));
-  bool found = mi_getenv(buf,s,sizeof(s));
+  bool found = mi_getenv(buf, s, sizeof(s));
  if (!found && desc->legacy_name != NULL) {
    _mi_strlcpy(buf, "mimalloc_", sizeof(buf));
    _mi_strlcat(buf, desc->legacy_name, sizeof(buf));
-    found = mi_getenv(buf,s,sizeof(s));
+    found = mi_getenv(buf, s, sizeof(s));
    if (found) {
-      _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name );
-    }    
+      _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name);
+    }
  }

  if (found) {
-    size_t len = _mi_strnlen(s,sizeof(buf)-1);
+    size_t len = _mi_strnlen(s, sizeof(buf) - 1);
    for (size_t i = 0; i < len; i++) {
      buf[i] = _mi_toupper(s[i]);
    }
    buf[len] = 0;
-    if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) {
+    if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) {
      desc->value = 1;
      desc->init = INITIALIZED;
    }
@ -527,7 +535,7 @@ static void mi_option_init(mi_option_desc_t* desc) {
    else {
      char* end = buf;
      long value = strtol(buf, &end, 10);
-      if (desc->option == mi_option_reserve_os_memory) {
+      if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_arena_reserve) {
        // this option is interpreted in KiB to prevent overflow of `long`
        if (*end == 'K') { end++; }
        else if (*end == 'M') { value *= MI_KiB; end++; }
@ -547,11 +555,11 @@ static void mi_option_init(mi_option_desc_t* desc) {
          // if the 'mimalloc_verbose' env var has a bogus value we'd never know
          // (since the value defaults to 'off') so in that case briefly enable verbose
          desc->value = 1;
-          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name );
+          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name);
          desc->value = 0;
        }
        else {
-          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name );
+          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name);
        }
      }
    }
--- a/src/os.c
+++ b/src/os.c
@ -21,13 +21,19 @@ static mi_os_mem_config_t mi_os_mem_config = {
  0,      // large page size (usually 2MiB)
  4096,   // allocation granularity
  true,   // has overcommit?  (if true we use MAP_NORESERVE on mmap systems)
-  false   // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
+  false,  // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
+  true    // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
 };

 bool _mi_os_has_overcommit(void) {
  return mi_os_mem_config.has_overcommit;
 }

+bool _mi_os_has_virtual_reserve(void) { 
+  return mi_os_mem_config.has_virtual_reserve;
+}
+
+
 // OS (small) page size
 size_t _mi_os_page_size(void) {
  return mi_os_mem_config.page_size;
@ -40,7 +46,7 @@ size_t _mi_os_large_page_size(void) {

 bool _mi_os_use_large_page(size_t size, size_t alignment) {
  // if we have access, check the size and alignment requirements
-  if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false;
+  if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
  return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0);
 }

@ -131,7 +137,9 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
  Free memory
 -------------------------------------------------------------- */

-static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) {
+static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
+
+static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
  MI_UNUSED(tld_stats);
  mi_assert_internal((size % _mi_os_page_size()) == 0);
  if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
@ -140,18 +148,38 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
    _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
  }
  mi_stats_t* stats = &_mi_stats_main;
-  if (was_committed) { _mi_stat_decrease(&stats->committed, size); }
+  if (still_committed) { _mi_stat_decrease(&stats->committed, size); }
  _mi_stat_decrease(&stats->reserved, size);
 }

-
-void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) {
-  const size_t csize = _mi_os_good_alloc_size(size);
-  mi_os_mem_free(addr,csize,was_committed,tld_stats);
+void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {
+  if (mi_memkind_is_os(memid.memkind)) {
+    size_t csize = _mi_os_good_alloc_size(size);
+    void* base = addr;
+    // different base? (due to alignment)
+    if (memid.mem.os.base != NULL) {
+      mi_assert(memid.mem.os.base <= addr);
+      mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
+      base = memid.mem.os.base;
+      csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base);
+    }
+    // free it
+    if (memid.memkind == MI_MEM_OS_HUGE) {
+      mi_assert(memid.is_pinned);
+      mi_os_free_huge_os_pages(base, csize, tld_stats);
+    }
+    else {
+      mi_os_prim_free(base, csize, still_committed, tld_stats);
+    }
+  }
+  else {
+    // nothing to do 
+    mi_assert(memid.memkind < MI_MEM_OS);
+  }
 }

-void  _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) {
-  _mi_os_free_ex(p, size, true, tld_stats);
+void  _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {
+  _mi_os_free_ex(p, size, true, memid, tld_stats);
 }


@ -160,31 +188,31 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) {
 -------------------------------------------------------------- */

 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
+static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
+  mi_assert_internal(is_zero != NULL);
+  mi_assert_internal(is_large != NULL);
  if (size == 0) return NULL;
-  if (!commit) allow_large = false;
-  if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning
+  if (!commit) { allow_large = false; }
+  if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning

+  *is_zero = false;
  void* p = NULL; 
-  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p);
+  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
  if (err != 0) {
    _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
  }
-  /*
-  if (commit && allow_large) {
-    p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment);
-    if (p != NULL) {
-      *is_large = true;
-      return p;
-    }
-  }
-  */
-
  mi_stat_counter_increase(stats->mmap_calls, 1);
  if (p != NULL) {
    _mi_stat_increase(&stats->reserved, size);
-    if (commit) { _mi_stat_increase(&stats->committed, size); }
+    if (commit) { 
+      _mi_stat_increase(&stats->committed, size); 
+      // seems needed for asan (or `mimalloc-test-api` fails)
+      #ifdef MI_TRACK_ASAN
+      if (*is_zero) { mi_track_mem_defined(p,size); }
+               else { mi_track_mem_undefined(p,size); }
+      #endif
+    }    
  }
  return p;
 }
@ -192,33 +220,40 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo

 // Primitive aligned allocation from the OS.
 // This function guarantees the allocated memory is aligned.
-static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
+static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) {
  mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
  mi_assert_internal(is_large != NULL);
+  mi_assert_internal(is_zero != NULL);
+  mi_assert_internal(base != NULL);
  if (!commit) allow_large = false;
  if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
  size = _mi_align_up(size, _mi_os_page_size());

  // try first with a hint (this will be aligned directly on Win 10+ or BSD)
-  void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats);
+  void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
  if (p == NULL) return NULL;

-  // if not aligned, free it, overallocate, and unmap around it
-  if (((uintptr_t)p % alignment != 0)) {
-    mi_os_mem_free(p, size, commit, stats);
+  // aligned already?
+  if (((uintptr_t)p % alignment) == 0) {
+    *base = p;
+  }
+  else {
+    // if not aligned, free it, overallocate, and unmap around it
    _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
+    mi_os_prim_free(p, size, commit, stats);
    if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
    const size_t over_size = size + alignment;

    if (mi_os_mem_config.must_free_whole) {  // win32 virtualAlloc cannot free parts of an allocate block
      // over-allocate uncommitted (virtual) memory
-      p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats);
+      p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
      if (p == NULL) return NULL;
-
+      
      // set p to the aligned part in the full region
-      // note: this is dangerous on Windows as VirtualFree needs the actual region pointer
-      // but in mi_os_mem_free we handle this (hopefully exceptional) situation.
+      // note: this is dangerous on Windows as VirtualFree needs the actual base pointer
+      // this is handled though by having the `base` field in the memid's
+      *base = p; // remember the base
      p = mi_align_up_ptr(p, alignment);

      // explicitly commit only the aligned part
@ -228,22 +263,24 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
    }
    else  { // mmap can free inside an allocation
      // overallocate...
-      p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats);
+      p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
      if (p == NULL) return NULL;
+      
      // and selectively unmap parts around the over-allocated area. (noop on sbrk)
      void* aligned_p = mi_align_up_ptr(p, alignment);
      size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
      size_t mid_size = _mi_align_up(size, _mi_os_page_size());
      size_t post_size = over_size - pre_size - mid_size;
      mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
-      if (pre_size > 0)  mi_os_mem_free(p, pre_size, commit, stats);
-      if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats);
+      if (pre_size > 0)  { mi_os_prim_free(p, pre_size, commit, stats); }
+      if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); }
      // we can return the aligned pointer on `mmap` (and sbrk) systems
      p = aligned_p;
+      *base = aligned_p; // since we freed the pre part, `*base == p`.      
    }
  }

-  mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0));
+  mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0));
  return p;
 }

@ -252,28 +289,40 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
  OS API: alloc and alloc_aligned
 ----------------------------------------------------------- */

-void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) {
+void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
  MI_UNUSED(tld_stats);
+  *memid = _mi_memid_none();
  mi_stats_t* stats = &_mi_stats_main;
  if (size == 0) return NULL;
  size = _mi_os_good_alloc_size(size);
-  bool is_large = false;
-  return mi_os_mem_alloc(size, 0, true, false, &is_large, stats);
+  bool os_is_large = false;
+  bool os_is_zero  = false;
+  void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);
+  if (p != NULL) {
+    *memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
+  }  
+  return p;
 }

-void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats)
+void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats)
 {
  MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
  MI_UNUSED(tld_stats);
+  *memid = _mi_memid_none();
  if (size == 0) return NULL;
  size = _mi_os_good_alloc_size(size);
  alignment = _mi_align_up(alignment, _mi_os_page_size());
-  bool allow_large = false;
-  if (large != NULL) {
-    allow_large = *large;
-    *large = false;
+  
+  bool os_is_large = false;
+  bool os_is_zero  = false;
+  void* os_base = NULL;
+  void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ );
+  if (p != NULL) {
+    *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
+    memid->mem.os.base = os_base;
+    memid->mem.os.alignment = alignment;
  }
-  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ );
+  return p;
 }

 /* -----------------------------------------------------------
@ -284,22 +333,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
  to use the actual start of the memory region.
 ----------------------------------------------------------- */

-void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) {
+void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) {
  mi_assert(offset <= MI_SEGMENT_SIZE);
  mi_assert(offset <= size);
  mi_assert((alignment % _mi_os_page_size()) == 0);
+  *memid = _mi_memid_none();
  if (offset > MI_SEGMENT_SIZE) return NULL;
  if (offset == 0) {
    // regular aligned allocation
-    return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats);
+    return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats);
  }
  else {
    // overallocate to align at an offset
    const size_t extra = _mi_align_up(offset, alignment) - offset;
    const size_t oversize = size + extra;
-    void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats);
+    void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats);
    if (start == NULL) return NULL;
-    void* p = (uint8_t*)start + extra;
+
+    void* const p = (uint8_t*)start + extra;
    mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
    // decommit the overallocation at the start
    if (commit && extra > _mi_os_page_size()) {
@ -309,14 +360,6 @@ void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset,
  }
 }

-void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) {
-  mi_assert(align_offset <= MI_SEGMENT_SIZE);
-  const size_t extra = _mi_align_up(align_offset, alignment) - align_offset;
-  void* start = (uint8_t*)p - extra;
-  _mi_os_free_ex(start, size + extra, was_committed, tld_stats);
-}
-
-
 /* -----------------------------------------------------------
  OS memory API: reset, commit, decommit, protect, unprotect.
 ----------------------------------------------------------- */
@ -345,63 +388,75 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
  return mi_os_page_align_areax(true, addr, size, newsize);
 }

-// Commit/Decommit memory.
-// Usually commit is aligned liberal, while decommit is aligned conservative.
-// (but not for the reset version where we want commit to be conservative as well)
-static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) {
-  // page align in the range, commit liberally, decommit conservative
+bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
+  MI_UNUSED(tld_stats);
+  mi_stats_t* stats = &_mi_stats_main;  
  if (is_zero != NULL) { *is_zero = false; }
-  size_t csize;
-  void* start = mi_os_page_align_areax(conservative, addr, size, &csize);
-  if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr))
-  if (commit) {
-    _mi_stat_increase(&stats->committed, size);  // use size for precise commit vs. decommit
-    _mi_stat_counter_increase(&stats->commit_calls, 1);
-  }
-  else {
-    _mi_stat_decrease(&stats->committed, size);
-  }
+  _mi_stat_increase(&stats->committed, size);  // use size for precise commit vs. decommit
+  _mi_stat_counter_increase(&stats->commit_calls, 1);

-  int err = _mi_prim_commit(start, csize, commit);  
+  // page align range
+  size_t csize;
+  void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
+  if (csize == 0) return true;
+
+  // commit  
+  bool os_is_zero = false;
+  int err = _mi_prim_commit(start, csize, &os_is_zero); 
  if (err != 0) {
-    _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize);
+    _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
+    return false;
+  }
+  if (os_is_zero && is_zero != NULL) { 
+    *is_zero = true;
+    mi_assert_expensive(mi_mem_is_zero(start, csize));
+  }
+  // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
+  #ifdef MI_TRACK_ASAN
+  if (os_is_zero) { mi_track_mem_defined(start,csize); }
+             else { mi_track_mem_undefined(start,csize); } 
+  #endif
+  return true;
+}
+
+static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) {
+  MI_UNUSED(tld_stats);
+  mi_stats_t* stats = &_mi_stats_main;
+  mi_assert_internal(needs_recommit!=NULL);
+  _mi_stat_decrease(&stats->committed, size);
+
+  // page align
+  size_t csize;
+  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
+  if (csize == 0) return true; 
+
+  // decommit
+  *needs_recommit = true;
+  int err = _mi_prim_decommit(start,csize,needs_recommit);  
+  if (err != 0) {
+    _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
  }
  mi_assert_internal(err == 0);
  return (err == 0);
 }

-bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats);
-}
-
 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  bool is_zero;
-  return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
+  bool needs_recommit;
+  return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats);
 }

-/*
-static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {  
-  return mi_os_commitx(addr, size, true, true // conservative
-                      , is_zero, stats);
-}
-*/

 // Signal to the OS that the address range is no longer in use
 // but may be used later again. This will release physical memory
 // pages and reduce swapping while keeping the memory committed.
 // We page align to a conservative area inside the range to reset.
-static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) {
+bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { 
  // page align conservatively within the range
  size_t csize;
  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
  if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr)
-  if (reset) _mi_stat_increase(&stats->reset, csize);
-        else _mi_stat_decrease(&stats->reset, csize);
-  if (!reset) return true; // nothing to do on unreset!
+  _mi_stat_increase(&stats->reset, csize);
+  _mi_stat_counter_increase(&stats->reset_calls, 1);

  #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN
  memset(start, 0, csize); // pretend it is eagerly reset
@ -414,24 +469,35 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
  return (err == 0);
 }

-// Signal to the OS that the address range is no longer in use
-// but may be used later again. This will release physical memory
-// pages and reduce swapping while keeping the memory committed.
-// We page align to a conservative area inside the range to reset.
-bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  return mi_os_resetx(addr, size, true, stats);
+
+// either resets or decommits memory, returns true if the memory needs 
+// to be recommitted if it is to be re-used later on.
+bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
+{
+  if (mi_option_get(mi_option_purge_delay) < 0) return false;  // is purging allowed?
+  _mi_stat_counter_increase(&stats->purge_calls, 1);
+  _mi_stat_increase(&stats->purged, size);
+
+  if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
+      !_mi_preloading())                                   // don't decommit during preloading (unsafe)
+  {
+    bool needs_recommit = true;
+    mi_os_decommit_ex(p, size, &needs_recommit, stats);
+    return needs_recommit;   
+  }
+  else {
+    if (allow_reset) {  // this can sometimes be not allowed if the range is not fully committed
+      _mi_os_reset(p, size, stats);
+    }
+    return false;  // needs no recommit
+  }
 }

-/*
-bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  *is_zero = false;
-  return mi_os_resetx(addr, size, false, stats);
+// either resets or decommits memory, returns true if the memory needs 
+// to be recommitted if it is to be re-used later on.
+bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) {
+  return _mi_os_purge_ex(p, size, true, stats);
 }
-*/

 // Protect a region in memory to be not accessible.
 static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
@ -506,7 +572,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
 #endif

 // Allocate MI_SEGMENT_SIZE aligned huge pages
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) {
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
+  *memid = _mi_memid_none();
  if (psize != NULL) *psize = 0;
  if (pages_reserved != NULL) *pages_reserved = 0;
  size_t size = 0;
@ -518,11 +585,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
  // or to at least allocate as many as available on the system.
  mi_msecs_t start_t = _mi_clock_start();
  size_t page = 0;
+  bool all_zero = true;
  while (page < pages) {
    // allocate a page
+    bool is_zero = false;
    void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
    void* p = NULL;
-    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p);
+    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
+    if (!is_zero) { all_zero = false;  }
    if (err != 0) {
      _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
      break;
@ -533,7 +603,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
      // no success, issue a warning and break
      if (p != NULL) {
        _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
-        _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
+        mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main);
      }
      break;
    }
@ -561,16 +631,25 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
  if (pages_reserved != NULL) { *pages_reserved = page; }
  if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
+  if (page != 0) {
+    mi_assert(start != NULL);
+    *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);
+    memid->memkind = MI_MEM_OS_HUGE;
+    mi_assert(memid->is_pinned);
+    #ifdef MI_TRACK_ASAN
+    if (all_zero) { mi_track_mem_defined(start,size); }
+    #endif
+  }
  return (page == 0 ? NULL : start);
 }

 // free every huge page in a range individually (as we allocated per page)
 // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
-void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
+static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {
  if (p==NULL || size==0) return;
  uint8_t* base = (uint8_t*)p;
  while (size >= MI_HUGE_OS_PAGE_SIZE) {
-    _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats);
+    mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats);
    size -= MI_HUGE_OS_PAGE_SIZE;
    base += MI_HUGE_OS_PAGE_SIZE;
  }
--- a/src/page.c
+++ b/src/page.c
@ -66,6 +66,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
    if (p < start || p >= end) return false;
    p = mi_block_next(page, p);
  }
+#if MI_DEBUG>3 // generally too expensive to check this
+  if (page->free_is_zero) {
+    const size_t ubsize = mi_page_usable_block_size(page);
+    for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
+      mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t)));
+    }
+  }
+#endif
  return true;
 }

@ -84,7 +92,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
  mi_assert_internal(mi_page_list_is_valid(page,page->local_free));

  #if MI_DEBUG>3 // generally too expensive to check this
-  if (page->is_zero) {
+  if (page->free_is_zero) {
    const size_t ubsize = mi_page_usable_block_size(page);
    for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
      mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t)));
@ -221,7 +229,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
      // usual case
      page->free = page->local_free;
      page->local_free = NULL;
-      page->is_zero = false;
+      page->free_is_zero = false;
    }
    else if (force) {
      // append -- only on shutdown (force) as this is a linear operation
@ -233,7 +241,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
      mi_block_set_next(page, tail, page->free);
      page->free = page->local_free;
      page->local_free = NULL;
-      page->is_zero = false;
+      page->free_is_zero = false;
    }
  }

@ -255,7 +263,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
  #if MI_HUGE_PAGE_ABANDON
  mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
  #endif
-  mi_assert_internal(!page->is_reset);
+  
  // TODO: push on full queue immediately if it is full?
  mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
  mi_page_queue_push(heap, pq, page);
@ -421,7 +429,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {

 // Retire parameters
 #define MI_MAX_RETIRE_SIZE    (MI_MEDIUM_OBJ_SIZE_MAX)
-#define MI_RETIRE_CYCLES      (8)
+#define MI_RETIRE_CYCLES      (16)

 // Retire a page with no more used blocks
 // Important to not retire too quickly though as new
@ -641,11 +649,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
  // enable the new free list
  page->capacity += (uint16_t)extend;
  mi_stat_increase(tld->stats.page_committed, extend * bsize);
-
-  // extension into zero initialized memory preserves the zero'd free list
-  if (!page->is_zero_init) {
-    page->is_zero = false;
-  }
  mi_assert_expensive(mi_page_is_valid_init(page));
 }

@ -671,14 +674,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
  page->keys[0] = _mi_heap_random_next(heap);
  page->keys[1] = _mi_heap_random_next(heap);
  #endif
-  #if MI_DEBUG > 0
-  page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501
-  #else
-  page->is_zero = page->is_zero_init;
+  page->free_is_zero = page->is_zero_init;
+  #if MI_DEBUG>2
+  if (page->is_zero_init) {
+    mi_track_mem_defined(page_start, page_size);
+    mi_assert_expensive(mi_mem_is_zero(page_start, page_size));
+  }
  #endif
-
+  
  mi_assert_internal(page->is_committed);
-  mi_assert_internal(!page->is_reset);
  mi_assert_internal(page->capacity == 0);
  mi_assert_internal(page->free == NULL);
  mi_assert_internal(page->used == 0);
--- a/src/prim/osx/alloc-override-zone.c
+++ b/src/prim/osx/alloc-override-zone.c
@ -195,7 +195,7 @@ static malloc_introspection_t mi_introspect = {
  .log = &intro_log,
  .force_lock = &intro_force_lock,
  .force_unlock = &intro_force_unlock,
-#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
+#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__)
  .statistics = &intro_statistics,
  .zone_locked = &intro_zone_locked,
 #endif
@ -216,7 +216,7 @@ static malloc_zone_t mi_malloc_zone = {
  .batch_malloc = &zone_batch_malloc,
  .batch_free = &zone_batch_free,
  .introspect = &mi_introspect,
-#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
+#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__)
  #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
  .version = 10,
  #else
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@ -134,6 +134,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
  config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
  config->has_overcommit = unix_detect_overcommit();
  config->must_free_whole = false;    // mmap can free in parts
+  config->has_virtual_reserve = true; // todo: check if this true for NetBSD?  (for anonymous mmap with PROT_NONE)
 }


@ -169,7 +170,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
      p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { 
        int err = errno;
-        _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
+        _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr);
      }
      if (p!=MAP_FAILED) return p;
      // fall back to regular mmap      
@ -189,7 +190,11 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
    if (hint != NULL) {
      p = mmap(hint, size, protect_flags, flags, fd, 0);
      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { 
+        #if MI_TRACK_ENABLED  // asan sometimes does not instrument errno correctly?
+        int err = 0;
+        #else
        int err = errno;
+        #endif
        _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
      }
      if (p!=MAP_FAILED) return p;
@ -204,28 +209,33 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
  return NULL;
 }

+static int unix_mmap_fd(void) {
+  #if defined(VM_MAKE_TAG)
+  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
+  int os_tag = (int)mi_option_get(mi_option_os_tag);
+  if (os_tag < 100 || os_tag > 255) { os_tag = 100; }
+  return VM_MAKE_TAG(os_tag);
+  #else
+  return -1;
+  #endif
+}
+
 static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
-  void* p = NULL;
  #if !defined(MAP_ANONYMOUS)
  #define MAP_ANONYMOUS  MAP_ANON
  #endif
  #if !defined(MAP_NORESERVE)
  #define MAP_NORESERVE  0
  #endif
+  void* p = NULL;
+  const int fd = unix_mmap_fd();
  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-  int fd = -1;
  if (_mi_os_has_overcommit()) {
    flags |= MAP_NORESERVE;
  }
  #if defined(PROT_MAX)
  protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
  #endif
-  #if defined(VM_MAKE_TAG)
-  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
-  int os_tag = (int)mi_option_get(mi_option_os_tag);
-  if (os_tag < 100 || os_tag > 255) { os_tag = 100; }
-  fd = VM_MAKE_TAG(os_tag);
-  #endif
  // huge page allocation
  if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) {
    static _Atomic(size_t) large_page_try_ok; // = 0;
@ -313,12 +323,13 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
 }

 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
  mi_assert_internal(commit || !allow_large);
  mi_assert_internal(try_alignment > 0);
  
-  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
+  *is_zero = true;
+  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);  
  *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
  return (*addr != NULL ? 0 : errno);
 }
@ -340,46 +351,46 @@ static void unix_mprotect_hint(int err) {
  #endif
 }

+int _mi_prim_commit(void* start, size_t size, bool* is_zero) {
+  // commit: ensure we can access the area
+  // note: we may think that *is_zero can be true since the memory
+  // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but
+  // we sometimes call commit on a range with still partially committed
+  // memory and `mprotect` does not zero the range.
+  *is_zero = false;  
+  int err = mprotect(start, size, (PROT_READ | PROT_WRITE));
+  if (err != 0) { 
+    err = errno; 
+    unix_mprotect_hint(err);
+  }
+  return err;
+}

-int _mi_prim_commit(void* start, size_t size, bool commit) {
-  /*
-  #if 0 && defined(MAP_FIXED) && !defined(__APPLE__)
-  // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?)
-  if (commit) {
-    // commit: just change the protection
-    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
-    if (err != 0) { err = errno; }
-  }
-  else {
-    // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss)
-    const int fd = mi_unix_mmap_fd();
-    void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
-    if (p != start) { err = errno; }
-  }
+int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
+  int err = 0;  
+  // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
+  err = unix_madvise(start, size, MADV_DONTNEED);    
+  #if !MI_DEBUG && !MI_SECURE
+    *needs_recommit = false;
  #else
+    *needs_recommit = true;
+    mprotect(start, size, PROT_NONE);
+  #endif
+  /*
+  // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss)
+  *needs_recommit = true;
+  const int fd = unix_mmap_fd();
+  void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
+  if (p != start) { err = errno; }    
  */
-  int err = 0;
-  if (commit) {
-    // commit: ensure we can access the area
-    err = mprotect(start, size, (PROT_READ | PROT_WRITE));
-    if (err != 0) { err = errno; }
-  }
-  else {
-    #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0
-    // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
-    // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( )
-    err = unix_madvise(start, size, MADV_DONTNEED);
-    #else
-    // decommit: just disable access (also used in debug and secure mode to trap on illegal access)
-    err = mprotect(start, size, PROT_NONE);
-    if (err != 0) { err = errno; }
-    #endif    
-  }
-  unix_mprotect_hint(err);
  return err;
 }

 int _mi_prim_reset(void* start, size_t size) {
+  // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it 
+  // will not reduce the `rss` stats in tools like `top` even though the memory is available
+  // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by 
+  // default `MADV_DONTNEED` is used though.
  #if defined(MADV_FREE)
  static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
  int oadvice = (int)mi_atomic_load_relaxed(&advice);
@ -426,8 +437,9 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co
 }
 #endif

-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
  bool is_large = true;
+  *is_zero = true;
  *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
  if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
    unsigned long numa_mask = (1UL << numa_node);
@ -445,8 +457,9 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, vo

 #else

-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *is_zero = false;
  *addr = NULL;
  return ENOMEM;
 }
@ -610,11 +623,19 @@ void _mi_prim_process_info(mi_process_info_t* pinfo)
  pinfo->page_faults = 0;
 #elif defined(__APPLE__)
  pinfo->peak_rss = rusage.ru_maxrss;         // macos reports in bytes
+  #ifdef MACH_TASK_BASIC_INFO
  struct mach_task_basic_info info;
  mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
    pinfo->current_rss = (size_t)info.resident_size;
  }
+  #else
+  struct task_basic_info info;
+  mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT;
+  if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
+    pinfo->current_rss = (size_t)info.resident_size;
+  }
+  #endif
 #else
  pinfo->peak_rss = rusage.ru_maxrss * 1024;  // Linux/BSD report in KiB
 #endif
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@ -21,6 +21,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
  config->alloc_granularity = 16;
  config->has_overcommit = false;  
  config->must_free_whole = true;
+  config->has_virtual_reserve = false;
 }

 //---------------------------------------------
@ -114,9 +115,10 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) {
 }

 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
  MI_UNUSED(allow_large); MI_UNUSED(commit);
  *is_large = false;
+  *is_zero = false;
  *addr = mi_prim_mem_grow(size, try_alignment);
  return (*addr != NULL ? 0 : ENOMEM);
 }
@ -126,8 +128,15 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la
 // Commit/Reset/Protect
 //---------------------------------------------

-int _mi_prim_commit(void* addr, size_t size, bool commit) {
-  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(commit);
+int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
+  MI_UNUSED(addr); MI_UNUSED(size); 
+  *is_zero = false;
+  return 0;
+}
+
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  *needs_recommit = false;
  return 0;
 }

@ -146,8 +155,9 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) {
 // Huge pages and NUMA nodes
 //---------------------------------------------

-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *is_zero = true;
  *addr = NULL;
  return ENOSYS;
 }
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@ -113,6 +113,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
 {
  config->has_overcommit = false;
  config->must_free_whole = true;
+  config->has_virtual_reserve = true;
  // get the page size
  SYSTEM_INFO si;
  GetSystemInfo(&si);
@ -142,7 +143,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
    pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
    FreeLibrary(hDll);
  }
-  if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
+  if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
    win_enable_large_os_pages(&config->large_page_size);
  }
 }
@ -239,10 +240,11 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW
  return p;
 }

-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
  mi_assert_internal(commit || !allow_large);
  mi_assert_internal(try_alignment > 0);
+  *is_zero = true;
  int flags = MEM_RESERVE;
  if (commit) { flags |= MEM_COMMIT; }
  *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
@ -257,26 +259,38 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la
 #pragma warning(disable:6250)   // suppress warning calling VirtualFree without MEM_RELEASE (for decommit)
 #endif

-int _mi_prim_commit(void* addr, size_t size, bool commit) {
-  if (commit) {
-    void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
-    return (p == addr ? 0 : (int)GetLastError());
-  }
-  else {
-    BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
-    return (ok ? 0 : (int)GetLastError());  
+int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
+  *is_zero = false;
+  /*
+  // zero'ing only happens on an initial commit... but checking upfront seems expensive..
+  _MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo);
+  if (VirtualQuery(addr, &meminfo, size) > 0) {
+    if ((meminfo.State & MEM_COMMIT) == 0) {
+      *is_zero = true;
+    }
  }
+  */
+  // commit
+  void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
+  if (p == NULL) return (int)GetLastError();
+  return 0;
+}
+
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {  
+  BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
+  *needs_recommit = true;  // for safety, assume always decommitted even in the case of an error.
+  return (ok ? 0 : (int)GetLastError());
 }

 int _mi_prim_reset(void* addr, size_t size) {
  void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
  mi_assert_internal(p == addr);
-  #if 1
-  if (p == addr && addr != NULL) {
-    VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory from the working set
+  #if 0
+  if (p != NULL) {
+    VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set
  }
  #endif
-  return (p == addr ? 0 : (int)GetLastError());
+  return (p != NULL ? 0 : (int)GetLastError());
 }

 int _mi_prim_protect(void* addr, size_t size, bool protect) {
@ -331,7 +345,8 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num
  return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
 }

-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
+  *is_zero = true;
  *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
  return (*addr != NULL ? 0 : (int)GetLastError());
 }
--- a/src/region.c
+++ b/src/region.c
@ -1,501 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
-and the segment and huge object allocation by mimalloc. There may be multiple
-implementations of this (one could be the identity going directly to the OS,
-another could be a simple cache etc), but the current one uses large "regions".
-In contrast to the rest of mimalloc, the "regions" are shared between threads and
-need to be accessed using atomic operations.
-We need this memory layer between the raw OS calls because of:
-1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
-   to reuse memory effectively.
-2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
-   an OS allocation/free is still (much) too expensive relative to the accesses
-   in that object :-( (`malloc-large` tests this). This means we need a cheaper
-   way to reuse memory.
-3. This layer allows for NUMA aware allocation.
-
-Possible issues:
- (2) can potentially be addressed too with a small cache per thread which is much
-  simpler. Generally though that requires shrinking of huge pages, and may overuse
-  memory per thread. (and is not compatible with `sbrk`).
- Since the current regions are per-process, we need atomic operations to
-  claim blocks which may be contended
- In the worst case, we need to search the whole region map (16KiB for 256GiB)
-  linearly. At what point will direct OS calls be faster? Is there a way to
-  do this better without adding too much complexity?
-----------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc/internal.h"
-#include "mimalloc/atomic.h"
-
-#include <string.h>  // memset
-
-#include "bitmap.h"
-
-// os.c
-bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
-
-// Constants
-#if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map
-#elif (MI_INTPTR_SIZE==4)
-#define MI_HEAP_REGION_MAX_SIZE    (3 * MI_GiB)    // ~ KiB for the region map
-#else
-#error "define the maximum heap space allowed for regions on this platform"
-#endif
-
-#define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
-#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
-#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
-#define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
-#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
-
-// Region info
-typedef union mi_region_info_u {
-  size_t value;
-  struct {
-    bool  valid;        // initialized?
-    bool  is_large:1;   // allocated in fixed large/huge OS pages
-    bool  is_pinned:1;  // pinned memory cannot be decommitted
-    short numa_node;    // the associated NUMA node (where -1 means no associated node)
-  } x;
-} mi_region_info_t;
-
-
-// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
-// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
-typedef struct mem_region_s {
-  _Atomic(size_t)           info;        // mi_region_info_t.value
-  _Atomic(void*)            start;       // start of the memory area
-  mi_bitmap_field_t         in_use;      // bit per in-use block
-  mi_bitmap_field_t         dirty;       // track if non-zero per block
-  mi_bitmap_field_t         commit;      // track if committed per block
-  mi_bitmap_field_t         reset;       // track if reset per block
-  _Atomic(size_t)           arena_memid; // if allocated from a (huge page) arena
-  _Atomic(size_t)           padding;     // round to 8 fields (needs to be atomic for msvc, see issue #508)
-} mem_region_t;
-
-// The region map
-static mem_region_t regions[MI_REGION_MAX];
-
-// Allocated regions
-static _Atomic(size_t) regions_count; // = 0;
-
-
-/* ----------------------------------------------------------------------------
-Utility functions
-----------------------------------------------------------------------------*/
-
-// Blocks (of 4MiB) needed for the given size.
-static size_t mi_region_block_count(size_t size) {
-  return _mi_divide_up(size, MI_SEGMENT_SIZE);
-}
-
-/*
-// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
-static size_t mi_good_commit_size(size_t size) {
-  if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
-  return _mi_align_up(size, _mi_os_large_page_size());
-}
-*/
-
-// Return if a pointer points into a region reserved by us.
-mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  if (p==NULL) return false;
-  size_t count = mi_atomic_load_relaxed(&regions_count);
-  for (size_t i = 0; i < count; i++) {
-    uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, &regions[i].start);
-    if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
-  }
-  return false;
-}
-
-
-static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
-  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
-  mi_assert_internal(start != NULL);
-  return (start + (bit_idx * MI_SEGMENT_SIZE));
-}
-
-static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
-  mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
-  size_t idx = region - regions;
-  mi_assert_internal(&regions[idx] == region);
-  return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
-}
-
-static size_t mi_memid_create_from_arena(size_t arena_memid) {
-  return (arena_memid << 1) | 1;
-}
-
-
-static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
-  if ((id&1)==1) {
-    if (arena_memid != NULL) *arena_memid = (id>>1);
-    return true;
-  }
-  else {
-    size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
-    *bit_idx   = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
-    *region    = &regions[idx];
-    return false;
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  Allocate a region is allocated from the OS (or an arena)
-----------------------------------------------------------------------------*/
-
-static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
-{
-  // not out of regions yet?
-  if (mi_atomic_load_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
-
-  // try to allocate a fresh region from the OS
-  bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
-  bool region_large = (commit && allow_large);
-  bool is_zero = false;
-  bool is_pinned = false;
-  size_t arena_memid = 0;
-  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
-  if (start == NULL) return false;
-  mi_assert_internal(!(region_large && !allow_large));
-  mi_assert_internal(!region_large || region_commit);
-
-  // claim a fresh slot
-  const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
-  if (idx >= MI_REGION_MAX) {
-    mi_atomic_decrement_acq_rel(&regions_count);
-    _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats);
-    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
-    return false;
-  }
-
-  // allocated, initialize and claim the initial blocks
-  mem_region_t* r = &regions[idx];
-  r->arena_memid  = arena_memid;
-  mi_atomic_store_release(&r->in_use, (size_t)0);
-  mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
-  mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
-  mi_atomic_store_release(&r->reset, (size_t)0);
-  *bit_idx = 0;
-  _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
-  mi_atomic_store_ptr_release(void,&r->start, start);
-
-  // and share it
-  mi_region_info_t info;
-  info.value = 0;                        // initialize the full union to zero
-  info.x.valid = true;
-  info.x.is_large = region_large;
-  info.x.is_pinned = is_pinned;
-  info.x.numa_node = (short)_mi_os_numa_node(tld);
-  mi_atomic_store_release(&r->info, info.value); // now make it available to others
-  *region = r;
-  return true;
-}
-
-/* ----------------------------------------------------------------------------
-  Try to claim blocks in suitable regions
-----------------------------------------------------------------------------*/
-
-static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
-  // initialized at all?
-  mi_region_info_t info;
-  info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info);
-  if (info.value==0) return false;
-
-  // numa correct
-  if (numa_node >= 0) {  // use negative numa node to always succeed
-    int rnode = info.x.numa_node;
-    if (rnode >= 0 && rnode != numa_node) return false;
-  }
-
-  // check allow-large
-  if (!allow_large && info.x.is_large) return false;
-
-  return true;
-}
-
-
-static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
-{
-  // try all regions for a free slot
-  const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
-  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
-  for (size_t visited = 0; visited < count; visited++, idx++) {
-    if (idx >= count) idx = 0;  // wrap around
-    mem_region_t* r = &regions[idx];
-    // if this region suits our demand (numa node matches, large OS page matches)
-    if (mi_region_is_suitable(r, numa_node, allow_large)) {
-      // then try to atomically claim a segment(s) in this region
-      if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
-        tld->region_idx = idx;    // remember the last found position
-        *region = r;
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-
-static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
-  mem_region_t* region;
-  mi_bitmap_index_t bit_idx;
-  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
-  // try to claim in existing regions
-  if (!mi_region_try_claim(numa_node, blocks, *large, &region, &bit_idx, tld)) {
-    // otherwise try to allocate a fresh region and claim in there
-    if (!mi_region_try_alloc_os(blocks, *commit, *large, &region, &bit_idx, tld)) {
-      // out of regions or memory
-      return NULL;
-    }
-  }
-
-  // ------------------------------------------------
-  // found a region and claimed `blocks` at `bit_idx`, initialize them now
-  mi_assert_internal(region != NULL);
-  mi_assert_internal(_mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
-
-  mi_region_info_t info;
-  info.value = mi_atomic_load_acquire(&region->info);
-  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&region->start);
-  mi_assert_internal(!(info.x.is_large && !*large));
-  mi_assert_internal(start != NULL);
-
-  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);
-  *large     = info.x.is_large;
-  *is_pinned = info.x.is_pinned;
-  *memid     = mi_memid_create(region, bit_idx);
-  void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
-
-  // commit
-  if (*commit) {
-    // ensure commit
-    bool any_uncommitted;
-    _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
-    if (any_uncommitted) {
-      mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-      bool commit_zero = false;
-      if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) {
-        // failed to commit! unclaim and return
-        mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-        return NULL;
-      }
-      if (commit_zero) *is_zero = true;
-    }
-  }
-  else {
-    // no need to commit, but check if already fully committed
-    *commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
-  }
-  mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
-
-  // unreset reset blocks
-  if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
-    // some blocks are still reset
-    mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
-    mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
-    if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
-      bool reset_zero = false;
-      _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
-      if (reset_zero) *is_zero = true;
-    }
-  }
-  mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
-
-  #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED  // && !MI_TSAN
-  if (*commit) { ((uint8_t*)p)[0] = 0; }
-  #endif
-
-  // and return the allocation
-  mi_assert_internal(p != NULL);
-  return p;
-}
-
-
-/* ----------------------------------------------------------------------------
- Allocation
-----------------------------------------------------------------------------*/
-
-// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
-// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(memid != NULL && tld != NULL);
-  mi_assert_internal(size > 0);
-  *memid = 0;
-  *is_zero = false;
-  *is_pinned = false;
-  bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`
-  if (size == 0) return NULL;
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  // allocate from regions if possible
-  void* p = NULL;
-  size_t arena_memid;
-  const size_t blocks = mi_region_block_count(size);
-  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
-    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
-    if (p == NULL) {
-      _mi_warning_message("unable to allocate from region: size %zu\n", size);
-    }
-  }
-  if (p == NULL) {
-    // and otherwise fall back to the OS
-    p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
-    *memid = mi_memid_create_from_arena(arena_memid);
-  }
-
-  if (p != NULL) {
-    mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0);
-    #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED  // && !MI_TSAN
-    if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
-    #endif
-  }
-  return p;
-}
-
-
-
-/* ----------------------------------------------------------------------------
-Free
-----------------------------------------------------------------------------*/
-
-// Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
-  mi_assert_internal(size > 0 && tld != NULL);
-  if (p==NULL) return;
-  if (size==0) return;
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  size_t arena_memid = 0;
-  mi_bitmap_index_t bit_idx;
-  mem_region_t* region;
-  if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
-   // was a direct arena allocation, pass through
-    _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats);
-  }
-  else {
-    // allocated in a region
-    mi_assert_internal(align_offset == 0);
-    mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
-    const size_t blocks = mi_region_block_count(size);
-    mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
-    mi_region_info_t info;
-    info.value = mi_atomic_load_acquire(&region->info);
-    mi_assert_internal(info.value != 0);
-    void* blocks_start = mi_region_blocks_start(region, bit_idx);
-    mi_assert_internal(blocks_start == p); // not a pointer in our area?
-    mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
-    if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
-
-    // committed?
-    if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
-      _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
-    }
-
-    if (any_reset) {
-      // set the is_reset bits if any pages were reset
-      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
-    }
-
-    // reset the blocks to reduce the working set.
-    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
-       && (mi_option_is_enabled(mi_option_eager_commit) ||
-           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
-    {
-      bool any_unreset;
-      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
-      if (any_unreset) {
-        _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
-        _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
-      }
-    }
-
-    // and unclaim
-    bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-    mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed);
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  collection
-----------------------------------------------------------------------------*/
-void _mi_mem_collect(mi_os_tld_t* tld) {
-  // free every region that has no segments in use.
-  size_t rcount = mi_atomic_load_relaxed(&regions_count);
-  for (size_t i = 0; i < rcount; i++) {
-    mem_region_t* region = &regions[i];
-    if (mi_atomic_load_relaxed(&region->info) != 0) {
-      // if no segments used, try to claim the whole region
-      size_t m = mi_atomic_load_relaxed(&region->in_use);
-      while (m == 0 && !mi_atomic_cas_weak_release(&region->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
-      if (m == 0) {
-        // on success, free the whole region
-        uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&regions[i].start);
-        size_t arena_memid = mi_atomic_load_relaxed(&regions[i].arena_memid);
-        size_t commit = mi_atomic_load_relaxed(&regions[i].commit);
-        memset((void*)&regions[i], 0, sizeof(mem_region_t));  // cast to void* to avoid atomic warning
-        // and release the whole region
-        mi_atomic_store_release(&region->info, (size_t)0);
-        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
-          _mi_abandoned_await_readers(); // ensure no pending reads
-          _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats);
-        }
-      }
-    }
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  Other
-----------------------------------------------------------------------------*/
-
-bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_decommit(p, size, tld->stats);
-  }
-  else {
-    return _mi_os_reset(p, size, tld->stats);
-  }
-}
-
-bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_commit(p, size, is_zero, tld->stats);
-  }
-  else {
-    return _mi_os_unreset(p, size, is_zero, tld->stats);
-  }
-}
-
-bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  return _mi_os_commit(p, size, is_zero, tld->stats);
-}
-
-bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
-  return _mi_os_decommit(p, size, tld->stats);
-}
-
-bool _mi_mem_protect(void* p, size_t size) {
-  return _mi_os_protect(p, size);
-}
-
-bool _mi_mem_unprotect(void* p, size_t size) {
-  return _mi_os_unprotect(p, size);
-}
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@ -1,423 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-  Implements a cache of segments to avoid expensive OS calls and to reuse
-  the commit_mask to optimize the commit/decommit calls.
-  The full memory map of all segments is also implemented here.
-----------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc/internal.h"
-#include "mimalloc/atomic.h"
-
-#include "./bitmap.h"  // atomic bitmap
-
-//#define MI_CACHE_DISABLE 1    // define to completely disable the segment cache
-
-#define MI_CACHE_FIELDS     (16)
-#define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
-
-#define BITS_SET()          MI_ATOMIC_VAR_INIT(UINTPTR_MAX)
-#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)                          // note: update if MI_CACHE_FIELDS changes
-
-typedef struct mi_cache_slot_s {
-  void*               p;
-  size_t              memid;
-  bool                is_pinned;
-  mi_commit_mask_t    commit_mask;
-  mi_commit_mask_t    decommit_mask;
-  _Atomic(mi_msecs_t) expire;
-} mi_cache_slot_t;
-
-static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
-
-static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
-static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
-static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
-
-static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) {
-  mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg);
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  return _mi_arena_memid_is_suitable(slot->memid, req_arena_id);
-}
-
-mi_decl_noinline static void* mi_segment_cache_pop_ex(
-                              bool all_suitable,
-                              size_t size, mi_commit_mask_t* commit_mask, 
-                              mi_commit_mask_t* decommit_mask, bool large_allowed,
-                              bool* large, bool* is_pinned, bool* is_zero, 
-                              mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
-{
-#ifdef MI_CACHE_DISABLE
-  return NULL;
-#else
-
-  // only segment blocks
-  if (size != MI_SEGMENT_SIZE) return NULL;
-
-  // numa node determines start field
-  const int numa_node = _mi_os_numa_node(tld);
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // find an available slot and make it unavailable
-  mi_bitmap_index_t bitidx = 0;
-  bool claimed = false;
-  mi_arena_id_t req_arena_id = _req_arena_id;
-  mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable);  // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
-
-  if (large_allowed) {  // large allowed?
-    claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
-    if (claimed) *large = true;
-  }
-  if (!claimed) {
-    claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
-    if (claimed) *large = false;
-  }
-
-  if (!claimed) return NULL;
-
-  // no longer available but still in-use
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
-
-  // found a slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  void* p = slot->p;
-  *memid = slot->memid;
-  *is_pinned = slot->is_pinned;
-  *is_zero = false;
-  *commit_mask = slot->commit_mask;     
-  *decommit_mask = slot->decommit_mask;
-  slot->p = NULL;
-  mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
-  
-  // mark the slot as free again
-  _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
-  return p;
-#endif
-}
-
-
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
-{
-  return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld);
-}
-
-static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
-{
-  if (mi_commit_mask_is_empty(cmask)) {
-    // nothing
-  }
-  else if (mi_commit_mask_is_full(cmask)) {
-    // decommit the whole in one call
-    _mi_os_decommit(p, total, stats);
-  }
-  else {
-    // decommit parts
-    mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
-    size_t part = total/MI_COMMIT_MASK_BITS;
-    size_t idx;
-    size_t count;    
-    mi_commit_mask_foreach(cmask, idx, count) {
-      void*  start = (uint8_t*)p + (idx*part);
-      size_t size = count*part;
-      _mi_os_decommit(start, size, stats);
-    }
-    mi_commit_mask_foreach_end()
-  }
-  mi_commit_mask_create_empty(cmask);
-}
-
-#define MI_MAX_PURGE_PER_PUSH  (4)
-
-static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld)
-{
-  MI_UNUSED(tld);
-  if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
-  mi_msecs_t now = _mi_clock_now();
-  size_t purged = 0;
-  const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
-  size_t idx              = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
-  for (size_t visited = 0; visited < max_visits; visited++,idx++) {  // visit N slots
-    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
-    mi_cache_slot_t* slot = &cache[idx];
-    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
-    if (expire != 0 && (force || now >= expire)) {  // racy read
-      // seems expired, first claim it from available
-      purged++;
-      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);      
-      if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) {  // no need to check large as those cannot be decommitted anyways
-        // it was available, we claimed it (and made it unavailable)
-        mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
-        mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
-        // we can now access it safely
-        expire = mi_atomic_loadi64_acquire(&slot->expire);
-        if (expire != 0 && (force || now >= expire)) {  // safe read
-          mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
-          // still expired, decommit it
-          mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-          mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask));
-          _mi_abandoned_await_readers();  // wait until safe to decommit
-          // decommit committed parts
-          // TODO: instead of decommit, we could also free to the OS?
-          mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
-          mi_commit_mask_create_empty(&slot->decommit_mask);
-        }
-        _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
-      }
-      if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
-    }
-  }
-}
-
-void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
-  if (force) {
-    // called on `mi_collect(true)` but not on thread termination    
-    _mi_segment_cache_free_all(tld);
-  }
-  else {
-    mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld);
-  }
-}
-
-void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
-  mi_commit_mask_t commit_mask;
-  mi_commit_mask_t decommit_mask;
-  bool is_pinned;
-  bool is_zero;
-  bool is_large;
-  size_t memid;
-  const size_t size = MI_SEGMENT_SIZE;
-  void* p;
-  do {
-    // keep popping and freeing the memory
-    p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask,
-                                true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld);
-    if (p != NULL) {
-      size_t csize = _mi_commit_mask_committed_size(&commit_mask, size);
-      if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); }
-      _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats);
-    }
-  } while (p != NULL);  
-}
-
-mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
-{
-#ifdef MI_CACHE_DISABLE
-  return false;
-#else
-
-  // purge expired entries
-  mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld);
-
-  // only cache normal segment blocks
-  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
-
-  // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast)
-  // This is a common case with reserved huge OS pages.
-  // 
-  // (note: we could also allow segments that are already fully decommitted but that never happens
-  //  as the first slice is always committed (for the segment metadata))
-  if (!_mi_arena_is_os_allocated(memid) && is_pinned) return false;
-
-  // numa node determines start field
-  int numa_node = _mi_os_numa_node(NULL);
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // find an available slot
-  mi_bitmap_index_t bitidx;
-  bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-  if (!claimed) return false;
-
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
-#if MI_DEBUG>1
-  if (is_pinned || is_large) {
-    mi_assert_internal(mi_commit_mask_is_full(commit_mask));
-  }
-#endif
-
-  // set the slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  slot->p = start;
-  slot->memid = memid;
-  slot->is_pinned = is_pinned;
-  mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-  slot->commit_mask = *commit_mask;
-  slot->decommit_mask = *decommit_mask;
-  if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
-    long delay = mi_option_get(mi_option_segment_decommit_delay);
-    if (delay == 0) {
-      _mi_abandoned_await_readers(); // wait until safe to decommit
-      mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
-      mi_commit_mask_create_empty(&slot->decommit_mask);
-    }
-    else {
-      mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
-    }
-  }
-
-  // make it available
-  _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx);
-  return true;
-#endif
-}
-
-
-/* -----------------------------------------------------------
-  The following functions are to reliably find the segment or
-  block that encompasses any pointer p (or NULL if it is not
-  in any of our segments).
-  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
-  set to 1 if it contains the segment meta data.
----------------------------------------------------------- */
-
-
-#if (MI_INTPTR_SIZE==8)
-#define MI_MAX_ADDRESS    ((size_t)40 << 40)  // 20TB
-#else
-#define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
-#endif
-
-#define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
-#define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
-#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
-
-static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
-
-static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
-  mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
-  if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
-    *bitidx = 0;
-    return MI_SEGMENT_MAP_WSIZE;
-  }
-  else {
-    const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
-    *bitidx = segindex % MI_INTPTR_BITS;
-    const size_t mapindex = segindex / MI_INTPTR_BITS;
-    mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
-    return mapindex;
-  }
-}
-
-void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
-  if (index==MI_SEGMENT_MAP_WSIZE) return;
-  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  uintptr_t newmask;
-  do {
-    newmask = (mask | ((uintptr_t)1 << bitidx));
-  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
-}
-
-void _mi_segment_map_freed_at(const mi_segment_t* segment) {
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
-  if (index == MI_SEGMENT_MAP_WSIZE) return;
-  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  uintptr_t newmask;
-  do {
-    newmask = (mask & ~((uintptr_t)1 << bitidx));
-  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
-}
-
-// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
-static mi_segment_t* _mi_segment_of(const void* p) {
-  if (p == NULL) return NULL;
-  mi_segment_t* segment = _mi_ptr_segment(p);
-  mi_assert_internal(segment != NULL);
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
-  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
-    return segment; // yes, allocated by us
-  }
-  if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
-
-  // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
-
-  // search downwards for the first segment in case it is an interior pointer
-  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
-  // valid huge objects
-  // note: we could maintain a lowest index to speed up the path for invalid pointers?
-  size_t lobitidx;
-  size_t loindex;
-  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
-  if (lobits != 0) {
-    loindex = index;
-    lobitidx = mi_bsr(lobits);    // lobits != 0
-  }
-  else if (index == 0) {
-    return NULL;
-  }
-  else {
-    mi_assert_internal(index > 0);
-    uintptr_t lomask = mask;
-    loindex = index;
-    do {
-      loindex--;  
-      lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);      
-    } while (lomask != 0 && loindex > 0);
-    if (lomask == 0) return NULL;
-    lobitidx = mi_bsr(lomask);    // lomask != 0
-  }
-  mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
-  // take difference as the addresses could be larger than the MAX_ADDRESS space.
-  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
-  segment = (mi_segment_t*)((uint8_t*)segment - diff);
-
-  if (segment == NULL) return NULL;
-  mi_assert_internal((void*)segment < p);
-  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
-  mi_assert_internal(cookie_ok);
-  if mi_unlikely(!cookie_ok) return NULL;
-  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
-  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
-  return segment;
-}
-
-// Is this a valid pointer in our heap?
-static bool  mi_is_valid_pointer(const void* p) {
-  return (_mi_segment_of(p) != NULL);
-}
-
-mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  return mi_is_valid_pointer(p);
-}
-
-/*
-// Return the full segment range belonging to a pointer
-static void* mi_segment_range_of(const void* p, size_t* size) {
-  mi_segment_t* segment = _mi_segment_of(p);
-  if (segment == NULL) {
-    if (size != NULL) *size = 0;
-    return NULL;
-  }
-  else {
-    if (size != NULL) *size = segment->segment_size;
-    return segment;
-  }
-  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
-  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
-  mi_reset_delayed(tld);
-  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
-  return page;
-}
-*/
--- a/src/segment-map.c
+++ b/src/segment-map.c
@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* -----------------------------------------------------------
+  The following functions are to reliably find the segment or
+  block that encompasses any pointer p (or NULL if it is not
+  in any of our segments).
+  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
+  set to 1 if it contains the segment meta data.
+----------------------------------------------------------- */
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+
+#if (MI_INTPTR_SIZE==8)
+#define MI_MAX_ADDRESS    ((size_t)40 << 40)  // 40TB (to include huge page areas)
+#else
+#define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
+#endif
+
+#define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
+#define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
+#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
+
+static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
+
+static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
+  mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
+  if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
+    *bitidx = 0;
+    return MI_SEGMENT_MAP_WSIZE;
+  }
+  else {
+    const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
+    *bitidx = segindex % MI_INTPTR_BITS;
+    const size_t mapindex = segindex / MI_INTPTR_BITS;
+    mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
+    return mapindex;
+  }
+}
+
+void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
+  if (index==MI_SEGMENT_MAP_WSIZE) return;
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  uintptr_t newmask;
+  do {
+    newmask = (mask | ((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+void _mi_segment_map_freed_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
+  if (index == MI_SEGMENT_MAP_WSIZE) return;
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  uintptr_t newmask;
+  do {
+    newmask = (mask & ~((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
+static mi_segment_t* _mi_segment_of(const void* p) {
+  if (p == NULL) return NULL;
+  mi_segment_t* segment = _mi_ptr_segment(p);
+  mi_assert_internal(segment != NULL);
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
+  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
+    return segment; // yes, allocated by us
+  }
+  if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
+
+  // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
+
+  // search downwards for the first segment in case it is an interior pointer
+  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
+  // valid huge objects
+  // note: we could maintain a lowest index to speed up the path for invalid pointers?
+  size_t lobitidx;
+  size_t loindex;
+  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
+  if (lobits != 0) {
+    loindex = index;
+    lobitidx = mi_bsr(lobits);    // lobits != 0
+  }
+  else if (index == 0) {
+    return NULL;
+  }
+  else {
+    mi_assert_internal(index > 0);
+    uintptr_t lomask = mask;
+    loindex = index;
+    do {
+      loindex--;  
+      lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);      
+    } while (lomask != 0 && loindex > 0);
+    if (lomask == 0) return NULL;
+    lobitidx = mi_bsr(lomask);    // lomask != 0
+  }
+  mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
+  // take difference as the addresses could be larger than the MAX_ADDRESS space.
+  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
+  segment = (mi_segment_t*)((uint8_t*)segment - diff);
+
+  if (segment == NULL) return NULL;
+  mi_assert_internal((void*)segment < p);
+  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
+  mi_assert_internal(cookie_ok);
+  if mi_unlikely(!cookie_ok) return NULL;
+  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
+  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
+  return segment;
+}
+
+// Is this a valid pointer in our heap?
+static bool  mi_is_valid_pointer(const void* p) {
+  return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p)));
+}
+
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return mi_is_valid_pointer(p);
+}
+
+/*
+// Return the full segment range belonging to a pointer
+static void* mi_segment_range_of(const void* p, size_t* size) {
+  mi_segment_t* segment = _mi_segment_of(p);
+  if (segment == NULL) {
+    if (size != NULL) *size = 0;
+    return NULL;
+  }
+  else {
+    if (size != NULL) *size = segment->segment_size;
+    return segment;
+  }
+  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
+  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
+  mi_reset_delayed(tld);
+  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
+  return page;
+}
+*/
--- a/src/segment.c
+++ b/src/segment.c
@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <string.h>  // memset
 #include <stdio.h>

-#define MI_PAGE_HUGE_ALIGN  (256*1024)
+#define MI_PAGE_HUGE_ALIGN   (256*1024)

-static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
+static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats);


 // -------------------------------------------------------------------
@ -257,7 +257,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
  mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
  mi_assert_internal(segment->abandoned <= segment->used);
  mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // can only decommit committed blocks
  //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
  mi_slice_t* slice = &segment->slices[0];
  const mi_slice_t* end = mi_segment_slices_end(segment);
@ -389,21 +389,14 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
    _mi_os_unprotect(end, os_pagesize);
  }

-  // purge delayed decommits now? (no, leave it to the cache)
-  // mi_segment_delayed_decommit(segment,true,tld->stats);
+  // purge delayed decommits now? (no, leave it to the arena)
+  // mi_segment_try_purge(segment,true,tld->stats);
  
-  // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
  const size_t size = mi_segment_size(segment);
-  if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache
-       !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
-  {
-    if (!segment->mem_is_pinned) {
-      const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
-      if (csize > 0) { _mi_stat_decrease(&_mi_stats_main.committed, csize); }
-    }
-    _mi_abandoned_await_readers();  // wait until safe to free
-    _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats);
-  }
+  const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
+
+  _mi_abandoned_await_readers();  // wait until safe to free
+  _mi_arena_free(segment, mi_segment_size(segment), csize, segment->memid, tld->stats);
 }

 // called by threads that are terminating 
@ -467,61 +460,81 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
  mi_commit_mask_create(bitidx, bitcount, cm);
 }

+static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));

-static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
-
-  // commit liberal, but decommit conservative
+  // commit liberal
  uint8_t* start = NULL;
  size_t   full_size = 0;
  mi_commit_mask_t mask;
-  mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask);
-  if (mi_commit_mask_is_empty(&mask) || full_size==0) return true;
+  mi_segment_commit_mask(segment, false /* conservative? */, p, size, &start, &full_size, &mask);
+  if (mi_commit_mask_is_empty(&mask) || full_size == 0) return true;

-  if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) {
+  if (!mi_commit_mask_all_set(&segment->commit_mask, &mask)) {
+    // committing
    bool is_zero = false;
    mi_commit_mask_t cmask;
    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
    _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
-    if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;    
-    mi_commit_mask_set(&segment->commit_mask, &mask);     
+    if (!_mi_os_commit(start, full_size, &is_zero, stats)) return false;
+    mi_commit_mask_set(&segment->commit_mask, &mask);
+  }
+  
+  // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon.
+  if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) {
+    segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay);
  }
-  else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) {
-    mi_assert_internal((void*)start != (void*)segment);
-    //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask));

-    mi_commit_mask_t cmask;
-    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
-    _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
-    if (segment->allow_decommit) { 
-      _mi_os_decommit(start, full_size, stats); // ok if this fails
-    } 
-    mi_commit_mask_clear(&segment->commit_mask, &mask);
-  }
-  // increase expiration of reusing part of the delayed decommit
-  if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) {
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
-  }
-  // always undo delayed decommits
-  mi_commit_mask_clear(&segment->decommit_mask, &mask);
+  // always clear any delayed purges in our range (as they are either committed now)
+  mi_commit_mask_clear(&segment->purge_mask, &mask);
  return true;
 }

 static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));
  // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow
-  if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed
+  if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed
  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
-  return mi_segment_commitx(segment,true,p,size,stats);
+  return mi_segment_commit(segment, p, size, stats);
 }

-static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  if (!segment->allow_decommit) return;
-  if (mi_option_get(mi_option_decommit_delay) == 0) {
-    mi_segment_commitx(segment, false, p, size, stats);
+static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {    
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));
+  if (!segment->allow_purge) return true;
+
+  // purge conservative
+  uint8_t* start = NULL;
+  size_t   full_size = 0;
+  mi_commit_mask_t mask;
+  mi_segment_commit_mask(segment, true /* conservative? */, p, size, &start, &full_size, &mask);
+  if (mi_commit_mask_is_empty(&mask) || full_size==0) return true;
+
+  if (mi_commit_mask_any_set(&segment->commit_mask, &mask)) {
+    // purging
+    mi_assert_internal((void*)start != (void*)segment);
+    mi_assert_internal(segment->allow_decommit);
+    const bool decommitted = _mi_os_purge(start, full_size, stats);  // reset or decommit
+    if (decommitted) {
+      mi_commit_mask_t cmask;
+      mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
+      _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting 
+      mi_commit_mask_clear(&segment->commit_mask, &mask);
+    }        
+  }
+  
+  // always clear any scheduled purges in our range
+  mi_commit_mask_clear(&segment->purge_mask, &mask);
+  return true;
+}
+
+static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
+  if (!segment->allow_purge) return;
+
+  if (mi_option_get(mi_option_purge_delay) == 0) {
+    mi_segment_purge(segment, p, size, stats);
  }
  else {
-    // register for future decommit in the decommit mask
+    // register for future purge in the purge mask
    uint8_t* start = NULL;
    size_t   full_size = 0;
    mi_commit_mask_t mask; 
@ -529,39 +542,39 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
    if (mi_commit_mask_is_empty(&mask) || full_size==0) return;
    
    // update delayed commit
-    mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask));      
+    mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask));      
    mi_commit_mask_t cmask;
-    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
-    mi_commit_mask_set(&segment->decommit_mask, &cmask);
+    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only purge what is committed; span_free may try to decommit more
+    mi_commit_mask_set(&segment->purge_mask, &cmask);
    mi_msecs_t now = _mi_clock_now();    
-    if (segment->decommit_expire == 0) {
-      // no previous decommits, initialize now
-      segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay);
+    if (segment->purge_expire == 0) {
+      // no previous purgess, initialize now
+      segment->purge_expire = now + mi_option_get(mi_option_purge_delay);
    }
-    else if (segment->decommit_expire <= now) {
-      // previous decommit mask already expired
-      if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) {
-        mi_segment_delayed_decommit(segment, true, stats);
+    else if (segment->purge_expire <= now) {
+      // previous purge mask already expired
+      if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) {
+        mi_segment_try_purge(segment, true, stats);
      }
      else {
-        segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+        segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's
      }
    }
    else {
-      // previous decommit mask is not yet expired, increase the expiration by a bit.
-      segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay);
+      // previous purge mask is not yet expired, increase the expiration by a bit.
+      segment->purge_expire += mi_option_get(mi_option_purge_extend_delay);
    }
  }  
 }

-static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) {
-  if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return;
+static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) {
+  if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return;
  mi_msecs_t now = _mi_clock_now();
-  if (!force && now < segment->decommit_expire) return;
+  if (!force && now < segment->purge_expire) return;

-  mi_commit_mask_t mask = segment->decommit_mask;
-  segment->decommit_expire = 0;
-  mi_commit_mask_create_empty(&segment->decommit_mask);
+  mi_commit_mask_t mask = segment->purge_mask;
+  segment->purge_expire = 0;
+  mi_commit_mask_create_empty(&segment->purge_mask);

  size_t idx;
  size_t count;
@ -570,11 +583,11 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
    if (count > 0) {
      uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE);
      size_t size = count * MI_COMMIT_SIZE;
-      mi_segment_commitx(segment, false, p, size, stats);
+      mi_segment_purge(segment, p, size, stats);
    }
  }
  mi_commit_mask_foreach_end()
-  mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
+  mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask));
 }


@ -587,7 +600,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) {
 }

 // note: can be called on abandoned segments
-static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
+static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) {
  mi_assert_internal(slice_index < segment->slice_entries);
  mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) 
                          ? NULL : mi_span_queue_for(slice_count,tld));
@ -607,8 +620,8 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
  }

  // perhaps decommit
-  if (allow_decommit) {
-    mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
+  if (allow_purge) {
+    mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
  }
  
  // and push it on the free page queue (if it was not a huge page)
@ -726,7 +739,6 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
  }
  
  // and initialize the page
-  page->is_reset = false;
  page->is_committed = true;
  segment->used++;
  return page;
@ -740,7 +752,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz
  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
  size_t next_index = mi_slice_index(slice) + slice_count;
  size_t next_count = slice->slice_count - slice_count;
-  mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld);
+  mi_segment_span_free(segment, next_index, next_count, false /* don't purge left-over part */, tld);
  slice->slice_count = (uint32_t)slice_count;
 }

@ -783,16 +795,13 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
   Segment allocation
 ----------------------------------------------------------- */

-static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id,
+static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id,
                                          size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, 
-                                          mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask,
-                                          bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+                                          bool commit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)

 {
-  // Allocate the segment from the OS
-  bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
-  bool is_pinned = false;
-  size_t memid = 0;
+  mi_memid_t memid;
+  bool   allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy
  size_t align_offset = 0;
  size_t alignment = MI_SEGMENT_ALIGN;
  
@ -806,48 +815,40 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
    // recalculate due to potential guard pages
    *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices);
  }
-  const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE;
-  mi_segment_t* segment = NULL;

-  // get from cache?
-  if (page_alignment == 0) {
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
+  const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE;
+  mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, commit, allow_large, req_arena_id, &memid, os_tld);
+  if (segment == NULL) {
+    return NULL;  // failed to allocate
+  }
+
+  // ensure metadata part of the segment is committed  
+  mi_commit_mask_t commit_mask; 
+  if (memid.initially_committed) { 
+    mi_commit_mask_create_full(&commit_mask);  
+  }
+  else { 
+    // at least commit the info slices
+    const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
+    mi_assert_internal(commit_needed>0);
+    mi_commit_mask_create(0, commit_needed, &commit_mask);    
+    mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE);
+    if (!_mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, NULL, tld->stats)) {
+      _mi_arena_free(segment,segment_size,0,memid,tld->stats);
+      return NULL;
+    }    
  }
-  
-  // get from OS
-  if (segment==NULL) {
-    segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, pcommit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
-    if (segment == NULL) return NULL;  // failed to allocate
-    if (*pcommit) {
-      mi_commit_mask_create_full(pcommit_mask);
-    }
-    else {
-      mi_commit_mask_create_empty(pcommit_mask);
-    }
-  }    
  mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);

-  const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
-  mi_assert_internal(commit_needed>0);
-  mi_commit_mask_t commit_needed_mask;
-  mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
-  if (!mi_commit_mask_all_set(pcommit_mask, &commit_needed_mask)) {
-    // at least commit the info slices
-    mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE);
-    bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, is_zero, tld->stats);
-    if (!ok) return NULL; // failed to commit 
-    mi_commit_mask_set(pcommit_mask, &commit_needed_mask); 
-  }
-  else if (*is_zero) {
-    // track zero initialization for valgrind
-    mi_track_mem_defined(segment, commit_needed * MI_COMMIT_SIZE);        
-  }
  segment->memid = memid;
-  segment->mem_is_pinned = is_pinned;
-  segment->mem_is_large = mem_large;
-  segment->mem_is_committed = mi_commit_mask_is_full(pcommit_mask);
-  segment->mem_alignment = alignment;
-  segment->mem_align_offset = align_offset;
+  segment->allow_decommit = !memid.is_pinned;
+  segment->allow_purge = segment->allow_decommit && (mi_option_get(mi_option_purge_delay) >= 0);
+  segment->segment_size = segment_size;
+  segment->commit_mask = commit_mask;
+  segment->purge_expire = 0;
+  mi_commit_mask_create_empty(&segment->purge_mask);
+  mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);  // tsan
+  
  mi_segments_track_size((long)(segment_size), tld);
  _mi_segment_map_allocated_at(segment);
  return segment;
@ -870,49 +871,21 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
                            tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
  const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
  bool commit = eager || (required > 0);   
-  bool is_zero = false;  
-
-  mi_commit_mask_t commit_mask;
-  mi_commit_mask_t decommit_mask;
-  mi_commit_mask_create_empty(&commit_mask);
-  mi_commit_mask_create_empty(&decommit_mask);
-
+  
  // Allocate the segment from the OS  
  mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, 
-                                              &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask, 
-                                              &is_zero, &commit, tld, os_tld);
+                                              &segment_slices, &pre_size, &info_slices, commit, tld, os_tld);
  if (segment == NULL) return NULL;
  
-  // zero the segment info? -- not always needed as it may be zero initialized from the OS 
-  mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);  // tsan
-  {
+  // zero the segment info? -- not always needed as it may be zero initialized from the OS   
+  if (!segment->memid.initially_zero) {
    ptrdiff_t ofs    = offsetof(mi_segment_t, next);
    size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
-    size_t    zsize  = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more
-    if (!is_zero) {
-      memset((uint8_t*)segment + ofs, 0, zsize);
-    }  
+    size_t    zsize  = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more  
+    _mi_memzero((uint8_t*)segment + ofs, zsize);
  }
  
-  segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
-  segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
-  if (segment->allow_decommit) {
-    segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
-    segment->decommit_mask = decommit_mask;
-    mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
-    #if MI_DEBUG>2
-    const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
-    mi_commit_mask_t commit_needed_mask;
-    mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
-    mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask));
-    #endif
-  }    
-  else {
-    segment->decommit_expire = 0;
-    mi_commit_mask_create_empty( &segment->decommit_mask );
-  }
-  
-  // initialize segment info
+  // initialize the rest of the segment info
  const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
  segment->segment_slices = segment_slices;
  segment->segment_info_slices = info_slices;
@ -921,7 +894,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
  segment->slice_entries = slice_entries;
  segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);

-  // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1));
+  // _mi_memzero(segment->slices, sizeof(mi_slice_t)*(info_slices+1));
  _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));

  // set up guard pages
@ -948,11 +921,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
  // initialize initial free pages
  if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
    mi_assert_internal(huge_page==NULL);
-    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld);
+    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't purge */, tld);
  }
  else {
    mi_assert_internal(huge_page!=NULL);
-    mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
+    mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask));
    mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask));
    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld);
    mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance 
@ -1015,17 +988,16 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
  _mi_stat_decrease(&tld->stats->pages, 1);

  // reset the page memory to reduce memory pressure?
-  if (!segment->mem_is_pinned && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
+  if (segment->allow_decommit && mi_option_is_enabled(mi_option_deprecated_page_reset)) {
    size_t psize;
-    uint8_t* start = _mi_page_start(segment, page, &psize);
-    page->is_reset = true;
+    uint8_t* start = _mi_page_start(segment, page, &psize);    
    _mi_os_reset(start, psize, tld->stats);
  }

  // zero the page data, but not the segment fields
  page->is_zero_init = false;
  ptrdiff_t ofs = offsetof(mi_page_t, capacity);
-  memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
+  _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs);
  page->xblock_size = 1;

  // and free it
@ -1256,8 +1228,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
    slice = slice + slice->slice_count;
  }

-  // perform delayed decommits
-  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats);    
+  // perform delayed decommits (forcing is much slower on mstress)
+  mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats);    
  
  // all pages in the segment are abandoned; add it to the abandoned list
  _mi_stat_increase(&tld->stats->segments_abandoned, 1);
@ -1365,7 +1337,6 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
    if (mi_slice_is_used(slice)) {
      // in use: reclaim the page in our heap
      mi_page_t* page = mi_slice_to_page(slice);
-      mi_assert_internal(!page->is_reset);
      mi_assert_internal(page->is_committed);
      mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
      mi_assert_internal(mi_page_heap(page) == NULL);
@ -1446,7 +1417,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
    }
    else {
      // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again
+      mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again
      mi_abandoned_visited_push(segment);
    }
  }
@ -1470,9 +1441,9 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
      mi_segment_reclaim(segment, heap, 0, NULL, tld);
    }
    else {
-      // otherwise, decommit if needed and push on the visited list 
-      // note: forced decommit can be expensive if many threads are destroyed/created as in mstress.
-      mi_segment_delayed_decommit(segment, force, tld->stats);
+      // otherwise, purge if needed and push on the visited list 
+      // note: forced purge can be expensive if many threads are destroyed/created as in mstress.
+      mi_segment_try_purge(segment, force, tld->stats);
      mi_abandoned_visited_push(segment);
    }
  }
@ -1530,7 +1501,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
  }
  mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
  mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
-  mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
+  mi_segment_try_purge(_mi_ptr_segment(page), false, tld->stats);
  return page;
 }

@ -1564,7 +1535,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
    mi_assert_internal(psize - (aligned_p - start) >= size);      
    uint8_t* decommit_start = start + sizeof(mi_block_t);              // for the free list
    ptrdiff_t decommit_size = aligned_p - decommit_start;
-    _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments    
+    _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments    
  }
  
  return page;
@ -1607,9 +1578,12 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc
  mi_assert_internal(page->used == 1); // this is called just before the free
  mi_assert_internal(page->free == NULL);
  if (segment->allow_decommit) {
-    const size_t csize = mi_usable_size(block) - sizeof(mi_block_t);
-    uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
-    _mi_os_decommit(p, csize, &_mi_stats_main);  // note: cannot use segment_decommit on huge segments
+    size_t csize = mi_usable_size(block);
+    if (csize > sizeof(mi_block_t)) {
+      csize = csize - sizeof(mi_block_t);
+      uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
+      _mi_os_reset(p, csize, &_mi_stats_main);  // note: cannot use segment_decommit on huge segments
+    }
  }
 }
 #endif
--- a/src/static.c
+++ b/src/static.c
@ -32,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "page.c"           // includes page-queue.c
 #include "random.c" 
 #include "segment.c"
-#include "segment-cache.c"
+#include "segment-map.c"
 #include "stats.c"
 #include "prim/prim.c"
 #if MI_OSX_ZONE
--- a/src/stats.c
+++ b/src/stats.c
@ -96,6 +96,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
  mi_stat_add(&stats->reserved, &src->reserved, 1);
  mi_stat_add(&stats->committed, &src->committed, 1);
  mi_stat_add(&stats->reset, &src->reset, 1);
+  mi_stat_add(&stats->purged, &src->purged, 1);
  mi_stat_add(&stats->page_committed, &src->page_committed, 1);

  mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1);
@ -111,6 +112,8 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
  mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1);
  mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1);
  mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1);
+  mi_stat_counter_add(&stats->reset_calls, &src->reset_calls, 1);
+  mi_stat_counter_add(&stats->purge_calls, &src->purge_calls, 1);

  mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
  mi_stat_counter_add(&stats->searches, &src->searches, 1);
@ -143,7 +146,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void*
  const int64_t pos = (n < 0 ? -n : n);
  if (pos < base) {
    if (n!=1 || suffix[0] != 'B') {  // skip printing 1 B for the unit column
-      snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix));
+      snprintf(buf, len, "%d   %-3s", (int)n, (n==0 ? "" : suffix));
    }
  }
  else {
@ -158,7 +161,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void*
    snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix);
    snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc);
  }
-  _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf);
+  _mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf);
 }


@ -167,7 +170,7 @@ static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* a
 }

 static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
-  if (unit==1) _mi_fprintf(out, arg, "%11s"," ");
+  if (unit==1) _mi_fprintf(out, arg, "%12s"," ");
          else mi_print_amount(n,0,out,arg);
 }

@ -182,7 +185,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64
    mi_print_count(stat->allocated, unit, out, arg);
    if (stat->allocated > stat->freed) {
      _mi_fprintf(out, arg, "  ");
-      _mi_fprintf(out, arg, (notok == NULL ? "not all freed!" : notok));
+      _mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok));
      _mi_fprintf(out, arg, "\n");
    }
    else {
@ -195,7 +198,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64
    mi_print_amount(stat->freed, -1, out, arg);
    mi_print_amount(stat->current, -1, out, arg);
    if (unit==-1) {
-      _mi_fprintf(out, arg, "%22s", "");
+      _mi_fprintf(out, arg, "%24s", "");
    }
    else {
      mi_print_amount(-unit, 1, out, arg);
@ -219,12 +222,19 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t
  mi_stat_print_ex(stat, msg, unit, out, arg, NULL);
 }

+static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
+  _mi_fprintf(out, arg, "%10s:", msg);
+  mi_print_amount(stat->peak, unit, out, arg);
+  _mi_fprintf(out, arg, "\n");
+}
+
 static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
  _mi_fprintf(out, arg, "%10s:", msg);
  mi_print_amount(stat->total, -1, out, arg);
  _mi_fprintf(out, arg, "\n");
 }

+
 static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) {
  const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count));
  const long avg_whole = (long)(avg_tens/10);
@ -234,7 +244,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char*


 static void mi_print_header(mi_output_fun* out, void* arg ) {
-  _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak   ", "total   ", "freed   ", "current   ", "unit   ", "count   ");
+  _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s %11s\n", "heap stats", "peak   ", "total   ", "freed   ", "current   ", "unit   ", "count   ");
 }

 #if MI_STAT>1
@ -321,7 +331,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
  #endif
  mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");
  mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, "");
-  mi_stat_print(&stats->reset, "reset", 1, out, arg);
+  mi_stat_peak_print(&stats->reset, "reset", 1, out, arg );
+  mi_stat_peak_print(&stats->purged, "purged", 1, out, arg );
  mi_stat_print(&stats->page_committed, "touched", 1, out, arg);
  mi_stat_print(&stats->segments, "segments", -1, out, arg);
  mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
@ -332,9 +343,11 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
  mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg);
  mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
  mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
+  mi_stat_counter_print(&stats->reset_calls, "resets", out, arg);
+  mi_stat_counter_print(&stats->purge_calls, "purges", out, arg);
  mi_stat_print(&stats->threads, "threads", -1, out, arg);
  mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
-  _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count());
+  _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count());

  size_t elapsed;
  size_t user_time;
@ -345,7 +358,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
  size_t peak_commit;
  size_t page_faults;
  mi_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
-  _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
+  _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
  _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process",
              user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults );
  mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s");
@ -431,7 +444,7 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
 mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
 {
  mi_process_info_t pinfo;
-  _mi_memzero(&pinfo,sizeof(pinfo));
+  _mi_memzero_var(pinfo);
  pinfo.elapsed        = _mi_clock_end(mi_process_start);
  pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
  pinfo.peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
--- a/test/test-api.c
+++ b/test/test-api.c
@ -46,6 +46,14 @@ bool test_heap2(void);
 bool test_stl_allocator1(void);
 bool test_stl_allocator2(void);

+bool mem_is_zero(uint8_t* p, size_t size) {
+  if (p==NULL) return false;
+  for (size_t i = 0; i < size; ++i) {
+    if (p[i] != 0) return false;
+  }
+  return true;
+}
+
 // ---------------------------------------------------------------------------
 // Main testing
 // ---------------------------------------------------------------------------
@ -212,6 +220,11 @@ int main(void) {
    result = mi_heap_contains_block(heap, p);
    mi_heap_destroy(heap);
  }
+  CHECK_BODY("mimalloc-aligned12") {
+    void* p = mi_malloc_aligned(0x100, 0x100);
+    result = (((uintptr_t)p % 0x100) == 0); // #602
+    mi_free(p);
+  }
  CHECK_BODY("malloc-aligned-at1") {
    void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
  };
@ -227,6 +240,21 @@ int main(void) {
    }
    result = ok;
  };
+  CHECK_BODY("zalloc-aligned-small1") {
+    size_t zalloc_size = MI_SMALL_SIZE_MAX / 2;
+    uint8_t* p = (uint8_t*)mi_zalloc_aligned(zalloc_size, MI_MAX_ALIGN_SIZE * 2);
+    result = mem_is_zero(p, zalloc_size);
+    mi_free(p);
+  };
+  CHECK_BODY("rezalloc_aligned-small1") {
+    size_t zalloc_size = MI_SMALL_SIZE_MAX / 2;
+    uint8_t* p = (uint8_t*)mi_zalloc_aligned(zalloc_size, MI_MAX_ALIGN_SIZE * 2);
+    result = mem_is_zero(p, zalloc_size);
+    zalloc_size *= 3;
+    p = (uint8_t*)mi_rezalloc_aligned(p, zalloc_size, MI_MAX_ALIGN_SIZE * 2);
+    result = result && mem_is_zero(p, zalloc_size);
+    mi_free(p);
+  };

  // ---------------------------------------------------
  // Reallocation
@ -286,7 +314,7 @@ int main(void) {
 // Larger test functions
 // ---------------------------------------------------

-bool test_heap1() {
+bool test_heap1(void) {
  mi_heap_t* heap = mi_heap_new();
  int* p1 = mi_heap_malloc_tp(heap,int);
  int* p2 = mi_heap_malloc_tp(heap,int);
@ -295,7 +323,7 @@ bool test_heap1() {
  return true;
 }

-bool test_heap2() {
+bool test_heap2(void) {
  mi_heap_t* heap = mi_heap_new();
  int* p1 = mi_heap_malloc_tp(heap,int);
  int* p2 = mi_heap_malloc_tp(heap,int);
@ -306,7 +334,7 @@ bool test_heap2() {
  return true;
 }

-bool test_stl_allocator1() {
+bool test_stl_allocator1(void) {
 #ifdef __cplusplus
  std::vector<int, mi_stl_allocator<int> > vec;
  vec.push_back(1);
@ -319,7 +347,7 @@ bool test_stl_allocator1() {

 struct some_struct  { int i; int j; double z; };

-bool test_stl_allocator2() {
+bool test_stl_allocator2(void) {
 #ifdef __cplusplus
  std::vector<some_struct, mi_stl_allocator<some_struct> > vec;
  vec.push_back(some_struct());
--- a/test/test-stress.c
+++ b/test/test-stress.c
@ -20,31 +20,37 @@ terms of the MIT license.
 #include <stdint.h>
 #include <stdbool.h>
 #include <string.h>
+#include <assert.h>

 // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
 //
 // argument defaults
 static int THREADS = 32;      // more repeatable if THREADS <= #processors
 static int SCALE   = 25;      // scaling factor
+
+#if defined(MI_TSAN)
+static int ITER    = 10;      // N full iterations destructing and re-creating all threads (on tsan reduce for azure pipeline limits)
+#else
 static int ITER    = 50;      // N full iterations destructing and re-creating all threads
+#endif

 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int SCALE   = 100;  // scaling factor

 #define STRESS   // undefine for leak test

-static bool   allow_large_objects = true;    // allow very large objects?
-static size_t use_one_size = 0;              // use single object size of `N * sizeof(uintptr_t)`?
+static bool   allow_large_objects = true;     // allow very large objects? (set to `true` if SCALE>100)
+static size_t use_one_size = 0;               // use single object size of `N * sizeof(uintptr_t)`?


 // #define USE_STD_MALLOC
 #ifdef USE_STD_MALLOC
-#define custom_calloc(n,s)    malloc(n*s)
+#define custom_calloc(n,s)    calloc(n,s)
 #define custom_realloc(p,s)   realloc(p,s)
 #define custom_free(p)        free(p)
 #else
 #include <mimalloc.h>
-#define custom_calloc(n,s)    mi_malloc(n*s)
+#define custom_calloc(n,s)    mi_calloc(n,s)
 #define custom_realloc(p,s)   mi_realloc(p,s)
 #define custom_free(p)        mi_free(p)
 #endif
@ -101,6 +107,7 @@ static void* alloc_items(size_t items, random_t r) {
  uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
  if (p != NULL) {
    for (uintptr_t i = 0; i < items; i++) {
+      assert(p[i] == 0);
      p[i] = (items - i) ^ cookie;
    }
  }
@ -222,6 +229,10 @@ static void test_leak(void) {
 #endif

 int main(int argc, char** argv) {
+  #ifndef USE_STD_MALLOC
+    mi_stats_reset();
+  #endif  
+
  // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
  if (argc >= 2) {
    char* end;
@ -238,15 +249,15 @@ int main(int argc, char** argv) {
    long n = (strtol(argv[3], &end, 10));
    if (n > 0) ITER = n;
  }
-  printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
+  if (SCALE > 100) {
+    allow_large_objects = true;
+  }
+  printf("Using %d threads with a %d%% load-per-thread and %d iterations %s\n", THREADS, SCALE, ITER, (allow_large_objects ? "(allow large objects)" : ""));
  //mi_reserve_os_memory(1024*1024*1024ULL, false, true);
  //int res = mi_reserve_huge_os_pages(4,1);
  //printf("(reserve huge: %i\n)", res);

  //bench_start_program();
-#ifndef USE_STD_MALLOC
-  mi_stats_reset();
-#endif  

  // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
  srand(0x7feb352d);