From 1d231be75829e46ab70a79825e19ab102b510523 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 13:05:48 -0700
Subject: [PATCH 1/7] clarify needs_recommit

---
 include/mimalloc/prim.h |  6 +++---
 src/os.c                | 24 ++++++++++++------------
 src/prim/unix/prim.c    |  6 +++---
 src/prim/wasi/prim.c    |  4 ++--
 src/prim/windows/prim.c |  4 ++--
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index c845f437..b900cc95 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -47,11 +47,11 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la
 // For example, on Linux this would make the memory PROT_READ|PROT_WRITE.
 int _mi_prim_commit(void* addr, size_t size);
 
-// Decommit memory. Returns error code or 0 on success. The `decommitted` result is true
+// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true
 // if the memory would need to be re-committed. For example, on Windows this is always true,
 // but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit.
-// pre: decommitted != NULL
-int _mi_prim_decommit(void* addr, size_t size, bool* decommitted);
+// pre: needs_recommit != NULL
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit);
 
 // Reset memory. The range keeps being accessible but the content might be reset.
 // Returns error code or 0 on success.
diff --git a/src/os.c b/src/os.c
index 88cdd837..78a7b5f7 100644
--- a/src/os.c
+++ b/src/os.c
@@ -377,10 +377,10 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats
   return (err == 0);
 }
 
-static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_stats_t* tld_stats) {
+static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) {
   MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
-  mi_assert_internal(decommitted!=NULL);
+  mi_assert_internal(needs_recommit!=NULL);
   _mi_stat_decrease(&stats->committed, size);
 
   // page align
@@ -389,8 +389,8 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_sta
   if (csize == 0) return true; 
 
   // decommit
-  *decommitted = true;
-  int err = _mi_prim_decommit(start,csize,decommitted);  
+  *needs_recommit = true;
+  int err = _mi_prim_decommit(start,csize,needs_recommit);  
   if (err != 0) {
     _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
   }
@@ -399,8 +399,8 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_sta
 }
 
 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
-  bool decommitted = true;
-  return mi_os_decommit_ex(addr, size, &decommitted, tld_stats);
+  bool needs_recommit;
+  return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats);
 }
 
 
@@ -427,18 +427,18 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
 }
 
 
-// either resets or decommits memory, returns true if the memory was decommitted 
-// (in the sense that it needs to be re-committed if the memory is re-used later on).
+// either resets or decommits memory, returns true if the memory needs 
+// to be recommitted if it is to be re-used later on.
 bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats)
 {
   if (!mi_option_is_enabled(mi_option_allow_purge)) return false;
 
   if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
-    !_mi_preloading())                                   // don't decommit during preloading (unsafe)
+    !_mi_preloading())                                     // don't decommit during preloading (unsafe)
   {
-    bool decommitted;
-    mi_os_decommit_ex(p, size, &decommitted, stats);
-    return decommitted;   
+    bool needs_recommit;
+    mi_os_decommit_ex(p, size, &needs_recommit, stats);
+    return needs_recommit;   
   }
   else {
     _mi_os_reset(p, size, stats);
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index d29dcc12..f9aa3b7c 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -352,16 +352,16 @@ int _mi_prim_commit(void* start, size_t size) {
   return err;
 }
 
-int _mi_prim_decommit(void* start, size_t size, bool* decommitted) {
+int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
   int err = 0;
   #if defined(MADV_DONTNEED) && !MI_DEBUG && !MI_SECURE
     // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
     // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( )
-    *decommitted = false;
+    *needs_recommit = false;
     err = unix_madvise(start, size, MADV_DONTNEED);
   #else
     // decommit: just disable access (also used in debug and secure mode to trap on illegal access)
-    *decommitted = true;  // needs recommit to reuse the memory
+    *needs_recommit = true;  // needs recommit to reuse the memory
     err = mprotect(start, size, PROT_NONE);
     if (err != 0) { err = errno; }
   #endif
diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c
index e843d99d..57d1c690 100644
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@@ -131,9 +131,9 @@ int _mi_prim_commit(void* addr, size_t size) {
   return 0;
 }
 
-int _mi_prim_decommit(void* addr, size_t size, bool* decommitted) {
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
   MI_UNUSED(addr); MI_UNUSED(size);
-  *decommitted = false;
+  *needs_recommit = false;
   return 0;
 }
 
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 72f086e3..467a42e9 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -262,9 +262,9 @@ int _mi_prim_commit(void* addr, size_t size) {
   return (p == addr ? 0 : (int)GetLastError());  
 }
 
-int _mi_prim_decommit(void* addr, size_t size, bool* decommitted) {  
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {  
   BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
-  *decommitted = true;  // for safetly, assume always decommitted even in the case of an error.
+  *needs_recommit = true;  // for safetly, assume always decommitted even in the case of an error.
   return (ok ? 0 : (int)GetLastError());
 }
 

From 77766e20a637117fb19f726be6f040492af75e97 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 14:58:06 -0700
Subject: [PATCH 2/7] use only arena's instead of regions

---
 CMakeLists.txt                       |   1 -
 ide/vs2022/mimalloc-override.vcxproj |   1 -
 ide/vs2022/mimalloc.vcxproj          |   1 -
 include/mimalloc/internal.h          |   3 +
 src/arena.c                          |  36 +-
 src/heap.c                           |   1 -
 src/init.c                           |   1 -
 src/options.c                        |  46 ++-
 src/region.c                         | 502 ---------------------------
 src/segment.c                        |  39 ++-
 src/static.c                         |   1 -
 11 files changed, 81 insertions(+), 551 deletions(-)
 delete mode 100644 src/region.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9550f77f..a9e098c7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,7 +49,6 @@ set(mi_sources
     src/os.c
     src/page.c
     src/random.c
-    src/region.c
     src/segment.c
     src/stats.c
     src/prim/prim.c)
diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj
index 50a3d6b9..81a3fc70 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@@ -247,7 +247,6 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
-    <ClCompile Include="..\..\src\region.c" />
     <ClCompile Include="..\..\src\options.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\page-queue.c">
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 9a7bf18c..d7e147b8 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -226,7 +226,6 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
-    <ClCompile Include="..\..\src\region.c" />
     <ClCompile Include="..\..\src\options.c" />
     <ClCompile Include="..\..\src\page-queue.c">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 5904198f..b9fe5453 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -119,7 +119,9 @@ void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_o
 bool       _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id);
 bool       _mi_arena_is_os_allocated(size_t arena_memid);
 void       _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats);
+bool       _mi_arena_contains(const void* p);
 
+/*
 // memory.c
 void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld);
 void       _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld);
@@ -132,6 +134,7 @@ bool       _mi_mem_protect(void* addr, size_t size);
 bool       _mi_mem_unprotect(void* addr, size_t size);
 
 void        _mi_mem_collect(mi_os_tld_t* tld);
+*/
 
 // "segment.c"
 mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
diff --git a/src/arena.c b/src/arena.c
index 64cb1624..724fbaf4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -128,6 +128,10 @@ static size_t mi_block_count_of_size(size_t size) {
   return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
 }
 
+static size_t mi_arena_block_size(size_t bcount) {
+  return (bcount * MI_ARENA_BLOCK_SIZE);
+}
+
 /* -----------------------------------------------------------
   Thread safe allocation in an arena
 ----------------------------------------------------------- */
@@ -158,7 +162,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
   if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
 
   // claimed it! 
-  void* p    = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
+  void* p    = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index));
   *memid     = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index);
   *large     = arena->is_large;
   *is_pinned = (arena->is_large || !arena->allow_decommit);
@@ -183,7 +187,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
     _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
     if (any_uncommitted) {
       bool commit_zero;
-      _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
+      _mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats);
       if (commit_zero) { *is_zero = true; }
     }
   }
@@ -192,7 +196,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
     *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
   }
   
-  // mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE);
+  // mi_track_mem_undefined(p,mi_arena_block_size(needed_bcount));
   return p;
 }
 
@@ -207,7 +211,7 @@ static void* mi_arena_alloc_in(mi_arena_id_t arena_id, int numa_node, size_t siz
   const size_t bcount = mi_block_count_of_size(size);  
   const size_t arena_index = mi_arena_id_index(arena_id);
   mi_assert_internal(arena_index < max_arena);
-  mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE);
+  mi_assert_internal(size <= mi_arena_block_size(bcount));
   if (arena_index >= max_arena) return NULL;
 
   mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
@@ -228,7 +232,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
   const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
   const size_t bcount = mi_block_count_of_size(size);
   if mi_likely(max_arena == 0) return NULL;
-  mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE);
+  mi_assert_internal(size <= mi_arena_block_size(bcount));
 
   size_t arena_index = mi_arena_id_index(req_arena_id);
   if (arena_index < MI_MAX_ARENAS) {
@@ -335,7 +339,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
   if (arena_index >= MI_MAX_ARENAS) return NULL;
   mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
   if (arena == NULL) return NULL;
-  if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE;
+  if (size != NULL) { *size = mi_arena_block_size(arena->block_count); }
   return arena->start;
 }
 
@@ -348,8 +352,8 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
   mi_assert_internal(arena->blocks_committed != NULL);
   mi_assert_internal(arena->blocks_purge != NULL);
   mi_assert_internal(arena->allow_decommit);
-  const size_t size = blocks * MI_ARENA_BLOCK_SIZE;
-  void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE);
+  const size_t size = mi_arena_block_size(blocks);
+  void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx));
   const bool decommitted = _mi_os_purge(p, size, stats);
   // clear the purged blocks
   _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
@@ -557,6 +561,22 @@ void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats)
   mi_arenas_try_purge(force_decommit, true, stats);
 }
 
+
+bool _mi_arena_contains(const void* p) {
+  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+  for (size_t i = 0; i < max_arena; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    if (arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { 
+      return true;      
+    }
+  }
+  return false;
+}
+
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return _mi_arena_contains(p);  // todo: extend to track os allocated memory as well
+}
+
 /* -----------------------------------------------------------
   Add an arena.
 ----------------------------------------------------------- */
diff --git a/src/heap.c b/src/heap.c
index 31a8b660..99316bb8 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -158,7 +158,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
 
   // collect regions on program-exit (or shared library unload)
   if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
-    _mi_mem_collect(&heap->tld->os);
     _mi_arena_collect(false,true,&heap->tld->stats);
   }
 }
diff --git a/src/init.c b/src/init.c
index 61245cd1..8c79561f 100644
--- a/src/init.c
+++ b/src/init.c
@@ -590,7 +590,6 @@ static void mi_cdecl mi_process_done(void) {
   // or C-runtime termination code.
   if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
     _mi_heap_destroy_all();                          // forcefully release all memory held by all heaps (of this thread only!)
-    _mi_mem_collect(&_mi_heap_main_get()->tld->os);  // release all regions
     _mi_arena_collect(true,true,&_mi_heap_main_get()->tld->stats);
   }
 
diff --git a/src/options.c b/src/options.c
index 79e3560e..5d4af7ba 100644
--- a/src/options.c
+++ b/src/options.c
@@ -41,10 +41,11 @@ typedef struct mi_option_desc_s {
   mi_init_t   init;   // is it initialized yet? (from the environment)
   mi_option_t option; // for debugging: the option index should match the option
   const char* name;   // option name without `mimalloc_` prefix
+  const char* legacy_name; // potential legacy option name
 } mi_option_desc_t;
 
-#define MI_OPTION(opt)        mi_option_##opt, #opt
-#define MI_OPTION_DESC(opt)   {0, UNINIT, MI_OPTION(opt) }
+#define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
+#define MI_OPTION_LEGACY(opt,legacy)    mi_option_##opt, #opt, #legacy
 
 static mi_option_desc_t options[_mi_option_last] =
 {
@@ -58,14 +59,9 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (4MiB)  (but see also `eager_commit_delay`)
-  #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
-  { 0, UNINIT, MI_OPTION(eager_region_commit) },
-  { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
-  #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset uses MADV_FREE/MADV_DONTNEED
-  #endif
+  { 1, UNINIT, MI_OPTION_LEGACY(segment_eager_commit,eager_commit) },      // commit per segment directly (4MiB)  (but see also `eager_commit_delay`)
+  { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) },
+  { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },        // purge decommits memory (instead of reset)  
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
   { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
@@ -87,7 +83,12 @@ static mi_option_desc_t options[_mi_option_last] =
   { 16,  UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
   { 8,   UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.
   { 0,   UNINIT, MI_OPTION(destroy_on_exit)},    // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
-  { 0,   UNINIT, MI_OPTION(arena_reserve) },     // reserve memory N KiB at a time (disable for now in v1.x due to regions)
+  #if (MI_INTPTR_SIZE>4)
+  { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time
+  #else
+  {  128L * 1024L, UNINIT, MI_OPTION(arena_reserve) },
+  #endif
+
   { 500, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation
   { 1,   UNINIT, MI_OPTION(allow_purge) }        // allow decommit/reset to free (physical) memory back to the OS
 };
@@ -504,18 +505,27 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
 
 static void mi_option_init(mi_option_desc_t* desc) {
   // Read option value from the environment
+  char s[64 + 1];
   char buf[64+1];
   _mi_strlcpy(buf, "mimalloc_", sizeof(buf));
   _mi_strlcat(buf, desc->name, sizeof(buf));
-  char s[64+1];
-  if (mi_getenv(buf, s, sizeof(s))) {
-    size_t len = _mi_strnlen(s,64);
-    if (len >= sizeof(buf)) len = sizeof(buf) - 1;
+  bool found = mi_getenv(buf, s, sizeof(s));
+  if (!found && desc->legacy_name != NULL) {
+    _mi_strlcpy(buf, "mimalloc_", sizeof(buf));
+    _mi_strlcat(buf, desc->legacy_name, sizeof(buf));
+    found = mi_getenv(buf, s, sizeof(s));
+    if (found) {
+      _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name);
+    }
+  }
+
+  if (found) {
+    size_t len = _mi_strnlen(s, sizeof(buf) - 1);
     for (size_t i = 0; i < len; i++) {
       buf[i] = _mi_toupper(s[i]);
     }
     buf[len] = 0;
-    if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) {
+    if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) {
       desc->value = 1;
       desc->init = INITIALIZED;
     }
@@ -546,11 +556,11 @@ static void mi_option_init(mi_option_desc_t* desc) {
           // if the 'mimalloc_verbose' env var has a bogus value we'd never know
           // (since the value defaults to 'off') so in that case briefly enable verbose
           desc->value = 1;
-          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name );
+          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name);
           desc->value = 0;
         }
         else {
-          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name );
+          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name);
         }
       }
     }
diff --git a/src/region.c b/src/region.c
deleted file mode 100644
index 36226eff..00000000
--- a/src/region.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
-and the segment and huge object allocation by mimalloc. There may be multiple
-implementations of this (one could be the identity going directly to the OS,
-another could be a simple cache etc), but the current one uses large "regions".
-In contrast to the rest of mimalloc, the "regions" are shared between threads and
-need to be accessed using atomic operations.
-We need this memory layer between the raw OS calls because of:
-1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
-   to reuse memory effectively.
-2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
-   an OS allocation/free is still (much) too expensive relative to the accesses
-   in that object :-( (`malloc-large` tests this). This means we need a cheaper
-   way to reuse memory.
-3. This layer allows for NUMA aware allocation.
-
-Possible issues:
-- (2) can potentially be addressed too with a small cache per thread which is much
-  simpler. Generally though that requires shrinking of huge pages, and may overuse
-  memory per thread. (and is not compatible with `sbrk`).
-- Since the current regions are per-process, we need atomic operations to
-  claim blocks which may be contended
-- In the worst case, we need to search the whole region map (16KiB for 256GiB)
-  linearly. At what point will direct OS calls be faster? Is there a way to
-  do this better without adding too much complexity?
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc/internal.h"
-#include "mimalloc/atomic.h"
-
-#include <string.h>  // memset
-
-#include "bitmap.h"
-
-// os.c
-bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
-
-// Constants
-#if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map
-#elif (MI_INTPTR_SIZE==4)
-#define MI_HEAP_REGION_MAX_SIZE    (3 * MI_GiB)    // ~ KiB for the region map
-#else
-#error "define the maximum heap space allowed for regions on this platform"
-#endif
-
-#define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
-#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
-#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
-#define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
-#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
-
-// Region info
-typedef union mi_region_info_u {
-  size_t value;
-  struct {
-    bool  valid;        // initialized?
-    bool  is_large:1;   // allocated in fixed large/huge OS pages
-    bool  is_pinned:1;  // pinned memory cannot be decommitted
-    short numa_node;    // the associated NUMA node (where -1 means no associated node)
-  } x;
-} mi_region_info_t;
-
-
-// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
-// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
-typedef struct mem_region_s {
-  _Atomic(size_t)           info;        // mi_region_info_t.value
-  _Atomic(void*)            start;       // start of the memory area
-  mi_bitmap_field_t         in_use;      // bit per in-use block
-  mi_bitmap_field_t         dirty;       // track if non-zero per block
-  mi_bitmap_field_t         commit;      // track if committed per block
-  mi_bitmap_field_t         reset;       // track if reset per block
-  _Atomic(size_t)           arena_memid; // if allocated from a (huge page) arena
-  _Atomic(size_t)           padding;     // round to 8 fields (needs to be atomic for msvc, see issue #508)
-} mem_region_t;
-
-// The region map
-static mem_region_t regions[MI_REGION_MAX];
-
-// Allocated regions
-static _Atomic(size_t) regions_count; // = 0;
-
-
-/* ----------------------------------------------------------------------------
-Utility functions
------------------------------------------------------------------------------*/
-
-// Blocks (of 4MiB) needed for the given size.
-static size_t mi_region_block_count(size_t size) {
-  return _mi_divide_up(size, MI_SEGMENT_SIZE);
-}
-
-/*
-// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
-static size_t mi_good_commit_size(size_t size) {
-  if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
-  return _mi_align_up(size, _mi_os_large_page_size());
-}
-*/
-
-// Return if a pointer points into a region reserved by us.
-mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  if (p==NULL) return false;
-  size_t count = mi_atomic_load_relaxed(&regions_count);
-  for (size_t i = 0; i < count; i++) {
-    uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, &regions[i].start);
-    if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
-  }
-  return false;
-}
-
-
-static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
-  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
-  mi_assert_internal(start != NULL);
-  return (start + (bit_idx * MI_SEGMENT_SIZE));
-}
-
-static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
-  mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
-  size_t idx = region - regions;
-  mi_assert_internal(&regions[idx] == region);
-  return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
-}
-
-static size_t mi_memid_create_from_arena(size_t arena_memid) {
-  return (arena_memid << 1) | 1;
-}
-
-
-static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
-  if ((id&1)==1) {
-    if (arena_memid != NULL) *arena_memid = (id>>1);
-    return true;
-  }
-  else {
-    size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
-    *bit_idx   = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
-    *region    = &regions[idx];
-    return false;
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  Allocate a region is allocated from the OS (or an arena)
------------------------------------------------------------------------------*/
-
-static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
-{
-  // not out of regions yet?
-  if (mi_atomic_load_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
-
-  // try to allocate a fresh region from the OS
-  bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
-  bool region_large = (commit && allow_large);
-  bool is_zero = false;
-  bool is_pinned = false;
-  size_t arena_memid = 0;
-  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
-  if (start == NULL) return false;
-  mi_assert_internal(!(region_large && !allow_large));
-  mi_assert_internal(!region_large || region_commit);
-
-  // claim a fresh slot
-  const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
-  if (idx >= MI_REGION_MAX) {
-    mi_atomic_decrement_acq_rel(&regions_count);
-    _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats);
-    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
-    return false;
-  }
-
-  // allocated, initialize and claim the initial blocks
-  mem_region_t* r = &regions[idx];
-  r->arena_memid  = arena_memid;
-  mi_atomic_store_release(&r->in_use, (size_t)0);
-  mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
-  mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
-  mi_atomic_store_release(&r->reset, (size_t)0);
-  *bit_idx = 0;
-  _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
-  mi_atomic_store_ptr_release(void,&r->start, start);
-
-  // and share it
-  mi_region_info_t info;
-  info.value = 0;                        // initialize the full union to zero
-  info.x.valid = true;
-  info.x.is_large = region_large;
-  info.x.is_pinned = is_pinned;
-  info.x.numa_node = (short)_mi_os_numa_node(tld);
-  mi_atomic_store_release(&r->info, info.value); // now make it available to others
-  *region = r;
-  return true;
-}
-
-/* ----------------------------------------------------------------------------
-  Try to claim blocks in suitable regions
------------------------------------------------------------------------------*/
-
-static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
-  // initialized at all?
-  mi_region_info_t info;
-  info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info);
-  if (info.value==0) return false;
-
-  // numa correct
-  if (numa_node >= 0) {  // use negative numa node to always succeed
-    int rnode = info.x.numa_node;
-    if (rnode >= 0 && rnode != numa_node) return false;
-  }
-
-  // check allow-large
-  if (!allow_large && info.x.is_large) return false;
-
-  return true;
-}
-
-
-static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
-{
-  // try all regions for a free slot
-  const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
-  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
-  for (size_t visited = 0; visited < count; visited++, idx++) {
-    if (idx >= count) idx = 0;  // wrap around
-    mem_region_t* r = &regions[idx];
-    // if this region suits our demand (numa node matches, large OS page matches)
-    if (mi_region_is_suitable(r, numa_node, allow_large)) {
-      // then try to atomically claim a segment(s) in this region
-      if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
-        tld->region_idx = idx;    // remember the last found position
-        *region = r;
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-
-static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
-  mem_region_t* region;
-  mi_bitmap_index_t bit_idx;
-  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
-  // try to claim in existing regions
-  if (!mi_region_try_claim(numa_node, blocks, *large, &region, &bit_idx, tld)) {
-    // otherwise try to allocate a fresh region and claim in there
-    if (!mi_region_try_alloc_os(blocks, *commit, *large, &region, &bit_idx, tld)) {
-      // out of regions or memory
-      return NULL;
-    }
-  }
-
-  // ------------------------------------------------
-  // found a region and claimed `blocks` at `bit_idx`, initialize them now
-  mi_assert_internal(region != NULL);
-  mi_assert_internal(_mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
-
-  mi_region_info_t info;
-  info.value = mi_atomic_load_acquire(&region->info);
-  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&region->start);
-  mi_assert_internal(!(info.x.is_large && !*large));
-  mi_assert_internal(start != NULL);
-
-  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);
-  *large     = info.x.is_large;
-  *is_pinned = info.x.is_pinned;
-  *memid     = mi_memid_create(region, bit_idx);
-  void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
-
-  // commit
-  if (*commit) {
-    // ensure commit
-    bool any_uncommitted;
-    _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
-    if (any_uncommitted) {
-      mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-      bool commit_zero = false;
-      if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) {
-        // failed to commit! unclaim and return
-        _mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-        return NULL;
-      }
-      if (commit_zero) *is_zero = true;
-    }
-  }
-  else {
-    // no need to commit, but check if already fully committed
-    *commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
-  }
-  mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
-
-  // unreset reset blocks
-  if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
-    // some blocks are still reset
-    mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
-    _mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
-    if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
-      bool reset_zero = false;
-      _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
-      if (reset_zero) *is_zero = true;
-    }
-  }
-  mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
-
-  #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED  // && !MI_TSAN
-  if (*commit) { ((uint8_t*)p)[0] = 0; }
-  #endif
-
-  // and return the allocation
-  mi_assert_internal(p != NULL);
-  return p;
-}
-
-
-/* ----------------------------------------------------------------------------
- Allocation
------------------------------------------------------------------------------*/
-
-// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
-// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(memid != NULL && tld != NULL);
-  mi_assert_internal(size > 0);
-  *memid = 0;
-  *is_zero = false;
-  *is_pinned = false;
-  bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`
-  if (size == 0) return NULL;
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  // allocate from regions if possible
-  void* p = NULL;
-  size_t arena_memid;
-  const size_t blocks = mi_region_block_count(size);
-  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
-    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
-    if (p == NULL) {
-      _mi_warning_message("unable to allocate from region: size %zu\n", size);
-    }
-  }
-  if (p == NULL) {
-    // and otherwise fall back to the OS
-    p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
-    *memid = mi_memid_create_from_arena(arena_memid);
-  }
-
-  if (p != NULL) {
-    mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0);
-    #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED  // && !MI_TSAN
-    if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
-    #endif
-  }
-  return p;
-}
-
-
-
-/* ----------------------------------------------------------------------------
-Free
------------------------------------------------------------------------------*/
-
-// Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
-  mi_assert_internal(size > 0 && tld != NULL);
-  if (p==NULL) return;
-  if (size==0) return;
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  size_t arena_memid = 0;
-  mi_bitmap_index_t bit_idx;
-  mem_region_t* region;
-  if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
-   // was a direct arena allocation, pass through
-    _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats);
-  }
-  else {
-    // allocated in a region
-    mi_assert_internal(align_offset == 0);
-    mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
-    const size_t blocks = mi_region_block_count(size);
-    mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
-    mi_region_info_t info;
-    info.value = mi_atomic_load_acquire(&region->info);
-    mi_assert_internal(info.value != 0);
-    void* blocks_start = mi_region_blocks_start(region, bit_idx);
-    mi_assert_internal(blocks_start == p); // not a pointer in our area?
-    mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
-    if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
-
-    // committed?
-    if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
-      _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
-    }
-
-    if (any_reset) {
-      // set the is_reset bits if any pages were reset
-      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
-    }
-
-    // reset the blocks to reduce the working set.
-    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
-       && (mi_option_is_enabled(mi_option_eager_commit) ||
-           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
-    {
-      bool any_unreset;
-      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
-      if (any_unreset) {
-        _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
-        _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
-      }
-    }
-
-    // and unclaim
-    bool all_unclaimed = _mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-    mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed);
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  collection
------------------------------------------------------------------------------*/
-void _mi_mem_collect(mi_os_tld_t* tld) {
-  // free every region that has no segments in use.
-  size_t rcount = mi_atomic_load_relaxed(&regions_count);
-  for (size_t i = 0; i < rcount; i++) {
-    mem_region_t* region = &regions[i];
-    if (mi_atomic_load_relaxed(&region->info) != 0) {
-      // if no segments used, try to claim the whole region
-      size_t m = mi_atomic_load_relaxed(&region->in_use);
-      while (m == 0 && !mi_atomic_cas_weak_release(&region->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
-      if (m == 0) {
-        // on success, free the whole region
-        uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&regions[i].start);
-        size_t arena_memid = mi_atomic_load_relaxed(&regions[i].arena_memid);
-        size_t commit = mi_atomic_load_relaxed(&regions[i].commit);
-        memset((void*)&regions[i], 0, sizeof(mem_region_t));  // cast to void* to avoid atomic warning
-        // and release the whole region
-        mi_atomic_store_release(&region->info, (size_t)0);
-        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
-          _mi_abandoned_await_readers(); // ensure no pending reads
-          _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats);
-        }
-      }
-    }
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  Other
------------------------------------------------------------------------------*/
-
-bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_decommit(p, size, tld->stats);
-  }
-  else {
-    return _mi_os_reset(p, size, tld->stats);
-  }
-}
-
-bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_commit(p, size, is_zero, tld->stats);
-  }
-  else {
-    // return _mi_os_unreset(p, size, is_zero, tld->stats);
-    return true;
-  }
-}
-
-bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  return _mi_os_commit(p, size, is_zero, tld->stats);
-}
-
-bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
-  return _mi_os_decommit(p, size, tld->stats);
-}
-
-bool _mi_mem_protect(void* p, size_t size) {
-  return _mi_os_protect(p, size);
-}
-
-bool _mi_mem_unprotect(void* p, size_t size) {
-  return _mi_os_unprotect(p, size);
-}
diff --git a/src/segment.c b/src/segment.c
index 56b7a06c..af325fe7 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -177,10 +177,10 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld)
 
 static void mi_segment_protect_range(void* p, size_t size, bool protect) {
   if (protect) {
-    _mi_mem_protect(p, size);
+    _mi_os_protect(p, size);
   }
   else {
-    _mi_mem_unprotect(p, size);
+    _mi_os_unprotect(p, size);
   }
 }
 
@@ -202,7 +202,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t*
       if (protect && !segment->mem_is_committed) {
         if (protect) {
           // ensure secure page is committed
-          if (_mi_mem_commit(start, os_psize, NULL, tld)) {  // if this fails that is ok (as it is an unaccessible page)
+          if (_mi_os_commit(start, os_psize, NULL, tld->stats)) {  // if this fails that is ok (as it is an unaccessible page)
             mi_segment_protect_range(start, os_psize, protect);
           }
         }
@@ -238,26 +238,29 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m
   page->is_reset = true;
   mi_assert_internal(size <= psize);
   size_t reset_size = ((size == 0 || size > psize) ? psize : size);
-  if (reset_size > 0) _mi_mem_reset(start, reset_size, tld->os);
+  if (reset_size > 0) { _mi_os_reset(start, reset_size, tld->stats); }
 }
 
 static bool mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld)
 {
+  MI_UNUSED(size);  MI_UNUSED(tld);
   mi_assert_internal(page->is_reset);
   mi_assert_internal(page->is_committed);
   mi_assert_internal(!segment->mem_is_pinned);
   if (segment->mem_is_pinned || !page->is_committed || !page->is_reset) return true;
   page->is_reset = false;
+  /*
   size_t psize;
   uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
   size_t unreset_size = (size == 0 || size > psize ? psize : size);
-  bool is_zero = false;
-  bool ok = true;
-  if (unreset_size > 0) {
-    ok = _mi_mem_unreset(start, unreset_size, &is_zero, tld->os);
-  }
-  if (is_zero) page->is_zero_init = true;
-  return ok;
+  */
+  // bool is_zero = false;
+  // bool ok = true;
+  // if (unreset_size > 0) {
+  //   ok = _mi_mem_unreset(start, unreset_size, &is_zero, tld->os);
+  // }
+  // if (is_zero) page->is_zero_init = true;
+  return true;
 }
 
 
@@ -477,7 +480,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
   if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) {
     fully_committed = false;
   }
-  _mi_mem_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, any_reset, tld->os);
+  
+  _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, tld->stats);
 }
 
 // called by threads that are terminating to free cached segments
@@ -510,17 +514,18 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
     *segment_size = *segment_size + (align_offset - pre_size);
   }
 
-  mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os);
+  // mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os);
+  mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, _mi_arena_id_none(), &memid, tld_os);
   if (segment == NULL) return NULL;  // failed to allocate
   if (!(*commit)) {
     // ensure the initial info is committed
     mi_assert_internal(!mem_large && !is_pinned);
     bool commit_zero = false;
-    bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld_os);
-    if (commit_zero) *is_zero = true;
+    bool ok = _mi_os_commit(segment, pre_size, &commit_zero, tld_os->stats);
+    if (commit_zero) { *is_zero = true; }
     if (!ok) {
       // commit failed; we cannot touch the memory: free the segment directly and return `NULL`
-      _mi_mem_free(segment, *segment_size, alignment, align_offset, memid, false, false, tld_os);
+      _mi_arena_free(segment, *segment_size, alignment, align_offset, memid, false, tld_os->stats);
       return NULL;
     }
   }
@@ -651,7 +656,7 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg
     uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
     bool is_zero = false;
     const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0);
-    bool ok = _mi_mem_commit(start, psize + gsize, &is_zero, tld->os);
+    bool ok = _mi_os_commit(start, psize + gsize, &is_zero, tld->stats);
     if (!ok) return false; // failed to commit!
     if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); }
     if (is_zero) { page->is_zero_init = true; }
diff --git a/src/static.c b/src/static.c
index 090f0c25..483b1b41 100644
--- a/src/static.c
+++ b/src/static.c
@@ -31,7 +31,6 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "os.c"
 #include "page.c"           // includes page-queue.c
 #include "random.c" 
-#include "region.c"
 #include "segment.c"
 #include "stats.c"
 #include "prim/prim.c"

From 4fc597d4f4680be8e12dc8900400ec5c07983781 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 15:05:29 -0700
Subject: [PATCH 3/7] more tight purge delay

---
 src/arena.c   | 7 ++++---
 src/options.c | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 724fbaf4..e711c949 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -305,9 +305,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
     {      
       mi_arena_id_t arena_id = 0;
 
-      bool arena_commit = _mi_os_has_overcommit();
-      if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } 
-      else if (mi_option_get(mi_option_arena_eager_commit) == 0) { arena_commit = false; } 
+      // commit eagerly?
+      bool arena_commit = false;
+      if (mi_option_get(mi_option_arena_eager_commit) == 2)      { arena_commit = _mi_os_has_overcommit(); }
+      else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
 
       if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) {
          p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);        
diff --git a/src/options.c b/src/options.c
index 5d4af7ba..580e1887 100644
--- a/src/options.c
+++ b/src/options.c
@@ -75,7 +75,7 @@ static mi_option_desc_t options[_mi_option_last] =
 #else
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
-  { 100, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
+  { 10,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes.
   { 0,   UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
@@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] =
   {  128L * 1024L, UNINIT, MI_OPTION(arena_reserve) },
   #endif
 
-  { 500, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation
+  { 100, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation
   { 1,   UNINIT, MI_OPTION(allow_purge) }        // allow decommit/reset to free (physical) memory back to the OS
 };
 

From 461df1e8788a26c76f03307941dce87c9b143ccf Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 15:40:06 -0700
Subject: [PATCH 4/7] implement arena exclusive heap allocation for dev

---
 include/mimalloc.h          |  2 +-
 include/mimalloc/internal.h |  1 +
 include/mimalloc/types.h    |  1 +
 src/heap.c                  | 15 ++++++++--
 src/init.c                  |  2 ++
 src/segment.c               | 55 +++++++++++++++++++++++++------------
 6 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 032faa4b..f229270c 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -281,7 +281,7 @@ mi_decl_export int   mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node,
 mi_decl_export int   mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 mi_decl_export bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 
-#if MI_MALLOC_VERSION >= 200
+#if MI_MALLOC_VERSION >= 182
 // Create a heap that only allocates in the specified arena
 mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
 #endif
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index b9fe5453..6e98be41 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -179,6 +179,7 @@ void       _mi_heap_destroy_pages(mi_heap_t* heap);
 void       _mi_heap_collect_abandon(mi_heap_t* heap);
 void       _mi_heap_set_default_direct(mi_heap_t* heap);
 void       _mi_heap_destroy_all(void);
+bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
 
 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 28343d21..434f9f67 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -410,6 +410,7 @@ struct mi_heap_s {
   mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
   _Atomic(mi_block_t*)  thread_delayed_free;
   mi_threadid_t         thread_id;                           // thread this heap belongs too
+  mi_arena_id_t         arena_id;                            // arena id if the heap belongs to a specific arena (or 0)  
   uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)
   uintptr_t             keys[2];                             // two random keys used to encode the `thread_delayed_free` list
   mi_random_ctx_t       random;                              // random number context used for secure allocation
diff --git a/src/heap.c b/src/heap.c
index 99316bb8..08b27f3d 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -198,15 +198,16 @@ mi_heap_t* mi_heap_get_backing(void) {
   return bheap;
 }
 
-mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
+mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
   mi_heap_t* bheap = mi_heap_get_backing();
   mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);  // todo: OS allocate in secure mode?
-  if (heap==NULL) return NULL;
+  if (heap == NULL) return NULL;
   _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
   heap->tld = bheap->tld;
   heap->thread_id = _mi_thread_id();
+  heap->arena_id = arena_id;
   _mi_random_split(&bheap->random, &heap->random);
-  heap->cookie  = _mi_heap_random_next(heap) | 1;
+  heap->cookie = _mi_heap_random_next(heap) | 1;
   heap->keys[0] = _mi_heap_random_next(heap);
   heap->keys[1] = _mi_heap_random_next(heap);
   heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
@@ -216,6 +217,14 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
   return heap;
 }
 
+mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
+  return mi_heap_new_in_arena(_mi_arena_id_none());
+}
+
+bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) {
+  return _mi_arena_memid_is_suitable(memid, heap->arena_id);
+}
+
 uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
   return _mi_random_next(&heap->random);
 }
diff --git a/src/init.c b/src/init.c
index 8c79561f..b105548e 100644
--- a/src/init.c
+++ b/src/init.c
@@ -96,6 +96,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   MI_ATOMIC_VAR_INIT(NULL),
   0,                // tid
   0,                // cookie
+  0,                // arena id
   { 0, 0 },         // keys
   { {0}, {0}, 0, true }, // random
   0,                // page count
@@ -132,6 +133,7 @@ mi_heap_t _mi_heap_main = {
   MI_ATOMIC_VAR_INIT(NULL),
   0,                // thread id
   0,                // initial cookie
+  0,                // arena id
   { 0, 0 },         // the key of the main heap can be fixed (unlike page keys that need to be secure!)
   { {0x846ca68b}, {0}, 0, true },  // random
   0,                // page count
diff --git a/src/segment.c b/src/segment.c
index af325fe7..458980cd 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -54,9 +54,11 @@ static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_
 }
 #endif
 
+/*
 static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) {
   return (queue->first == NULL);
 }
+*/
 
 static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) {
   mi_assert_expensive(mi_segment_queue_contains(queue, segment));
@@ -500,7 +502,8 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
    Segment allocation
 ----------------------------------------------------------- */
 
-static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, size_t pre_size, size_t info_size,
+static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, mi_arena_id_t req_arena_id, 
+                                         size_t pre_size, size_t info_size,
                                          size_t* segment_size, bool* is_zero, bool* commit, mi_segments_tld_t* tld, mi_os_tld_t* tld_os)
 {
   size_t memid;
@@ -515,7 +518,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
   }
 
   // mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os);
-  mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, _mi_arena_id_none(), &memid, tld_os);
+  mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, tld_os);
   if (segment == NULL) return NULL;  // failed to allocate
   if (!(*commit)) {
     // ensure the initial info is committed
@@ -541,7 +544,8 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
 }
 
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment,
+                                      mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
   // required is only > 0 for huge page allocations
   mi_assert_internal((required > 0 && page_kind > MI_PAGE_LARGE)|| (required==0 && page_kind <= MI_PAGE_LARGE));
@@ -574,7 +578,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   bool is_zero = false;
 
   // Allocate the segment from the OS (segment_size can change due to alignment)
-  mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, pre_size, info_size, &segment_size, &is_zero, &commit, tld, os_tld);
+  mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, &segment_size, &is_zero, &commit, tld, os_tld);
   if (segment == NULL) return NULL;
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
   mi_assert_internal(segment->mem_is_pinned ? segment->mem_is_committed : true);
@@ -1094,6 +1098,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
   long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024);     // limit the work to bound allocation times
   while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
     segment->abandoned_visits++;
+    // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
+    // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way?
+    bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid);
     bool all_pages_free;
     bool has_page = mi_segment_check_free(segment,block_size,&all_pages_free); // try to free up pages (due to concurrent frees)
     if (all_pages_free) {
@@ -1104,18 +1111,19 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
       // freeing but that would violate some invariants temporarily)
       mi_segment_reclaim(segment, heap, 0, NULL, tld);
     }
-    else if (has_page && segment->page_kind == page_kind) {
+    else if (has_page && segment->page_kind == page_kind && is_suitable) {
       // found a free page of the right kind, or page of the right block_size with free space
       // we return the result of reclaim (which is usually `segment`) as it might free
       // the segment due to concurrent frees (in which case `NULL` is returned).
       return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
     }
-    else if (segment->abandoned_visits >= 3) {
+    else if (segment->abandoned_visits >= 3 && is_suitable) {
       // always reclaim on 3rd visit to limit the list length.
       mi_segment_reclaim(segment, heap, 0, NULL, tld);
     }
     else {
       // otherwise, push on the visited list so it gets not looked at too quickly again
+      // todo: reset delayed pages in the segment?
       mi_abandoned_visited_push(segment);
     }
   }
@@ -1135,6 +1143,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s
   // 1. try to reclaim an abandoned segment
   bool reclaimed;
   mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld);
+  mi_assert_internal(segment == NULL || _mi_arena_memid_is_suitable(segment->memid, heap->arena_id));
   if (reclaimed) {
     // reclaimed the right page right into the heap
     mi_assert_internal(segment != NULL && segment->page_kind == page_kind && page_kind <= MI_PAGE_LARGE);
@@ -1145,7 +1154,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s
     return segment;
   }
   // 2. otherwise allocate a fresh segment
-  return mi_segment_alloc(0, page_kind, page_shift, 0, tld, os_tld);
+  return mi_segment_alloc(0, page_kind, page_shift, 0, heap->arena_id, tld, os_tld);
 }
 
 
@@ -1155,7 +1164,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s
 
 static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
-  mi_assert_expensive(mi_segment_is_valid(segment, tld));
+  mi_assert_expensive(mi_segment_is_valid(segment, tld));  
   for (size_t i = 0; i < segment->capacity; i++) {  // TODO: use a bitmap instead of search?
     mi_page_t* page = &segment->pages[i];
     if (!page->segment_in_use) {
@@ -1173,24 +1182,34 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl
   return mi_segment_find_free(segment, tld);
 }
 
-static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
+static mi_page_t* mi_segment_page_try_alloc_in_queue(mi_heap_t* heap, mi_page_kind_t kind, mi_segments_tld_t* tld) {
   // find an available segment the segment free queue
   mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld);
-  if (mi_segment_queue_is_empty(free_queue)) {
+  for (mi_segment_t* segment = free_queue->first; segment != NULL; segment = segment->next) {
+    if (_mi_arena_memid_is_suitable(segment->memid, heap->arena_id) && mi_segment_has_free(segment)) {
+      return mi_segment_page_alloc_in(segment, tld);
+    }
+  }
+  return NULL;
+}
+
+static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
+  mi_page_t* page = mi_segment_page_try_alloc_in_queue(heap, kind, tld);
+  if (page == NULL) {
     // possibly allocate or reclaim a fresh segment
     mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld);
     if (segment == NULL) return NULL;  // return NULL if out-of-memory (or reclaimed)
     mi_assert_internal(free_queue->first == segment);
     mi_assert_internal(segment->page_kind==kind);
     mi_assert_internal(segment->used < segment->capacity);
+    mi_assert_internal(_mi_arena_memid_is_suitable(segment->memid, heap->arena_id));
+    page = mi_segment_page_try_alloc_in_queue(heap, kind, tld);  // this should now succeed
   }
-  mi_assert_internal(free_queue->first != NULL);
-  mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld);
   mi_assert_internal(page != NULL);
-#if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN
+  #if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN
   // verify it is committed
   _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0;
-#endif
+  #endif
   return page;
 }
 
@@ -1217,9 +1236,9 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size
   return page;
 }
 
-static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
-  mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, tld, os_tld);
+  mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, req_arena_id, tld, os_tld);
   if (segment == NULL) return NULL;
   mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size);
   #if MI_HUGE_PAGE_ABANDON
@@ -1303,7 +1322,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
     mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE);
     //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0);
     if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; }
-    page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld);
+    page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld, os_tld);
   }
   else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
     page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld);
@@ -1315,7 +1334,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
     page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld);
   }
   else {
-    page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld);
+    page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld, os_tld);
   }
   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
   mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);

From a2e1d2b89321f16adc0cdb3222d6ff88ef4d7eb4 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 16:00:17 -0700
Subject: [PATCH 5/7] add segment map to track valid pointers

---
 CMakeLists.txt              |   1 +
 include/mimalloc/internal.h |  21 ++---
 src/arena.c                 |   3 -
 src/segment-map.c           | 153 ++++++++++++++++++++++++++++++++++++
 src/segment.c               |   7 +-
 src/static.c                |   1 +
 6 files changed, 166 insertions(+), 20 deletions(-)
 create mode 100644 src/segment-map.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a9e098c7..2bcd1ef7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,6 +50,7 @@ set(mi_sources
     src/page.c
     src/random.c
     src/segment.c
+    src/segment-map.c
     src/stats.c
     src/prim/prim.c)
 
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 6e98be41..c776d985 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -121,20 +121,9 @@ bool       _mi_arena_is_os_allocated(size_t arena_memid);
 void       _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats);
 bool       _mi_arena_contains(const void* p);
 
-/*
-// memory.c
-void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld);
-void       _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld);
-
-bool       _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
-bool       _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
-bool       _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
-bool       _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld);
-bool       _mi_mem_protect(void* addr, size_t size);
-bool       _mi_mem_unprotect(void* addr, size_t size);
-
-void        _mi_mem_collect(mi_os_tld_t* tld);
-*/
+// "segment-map.c"
+void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
+void       _mi_segment_map_freed_at(const mi_segment_t* segment);
 
 // "segment.c"
 mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
@@ -460,6 +449,10 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
   return mi_page_block_size(page) - MI_PADDING_SIZE;
 }
 
+// size of a segment
+static inline size_t mi_segment_size(mi_segment_t* segment) {
+  return segment->segment_size;
+}
 
 // Thread free access
 static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
diff --git a/src/arena.c b/src/arena.c
index e711c949..14dd8b4d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -574,9 +574,6 @@ bool _mi_arena_contains(const void* p) {
   return false;
 }
 
-mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  return _mi_arena_contains(p);  // todo: extend to track os allocated memory as well
-}
 
 /* -----------------------------------------------------------
   Add an arena.
diff --git a/src/segment-map.c b/src/segment-map.c
new file mode 100644
index 00000000..56b18531
--- /dev/null
+++ b/src/segment-map.c
@@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* -----------------------------------------------------------
+  The following functions are to reliably find the segment or
+  block that encompasses any pointer p (or NULL if it is not
+  in any of our segments).
+  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
+  set to 1 if it contains the segment meta data.
+----------------------------------------------------------- */
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+
+#if (MI_INTPTR_SIZE==8)
+#define MI_MAX_ADDRESS    ((size_t)40 << 40)  // 40TB (to include huge page areas)
+#else
+#define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
+#endif
+
+#define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
+#define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
+#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
+
+static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
+
+static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
+  mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
+  if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
+    *bitidx = 0;
+    return MI_SEGMENT_MAP_WSIZE;
+  }
+  else {
+    const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
+    *bitidx = segindex % MI_INTPTR_BITS;
+    const size_t mapindex = segindex / MI_INTPTR_BITS;
+    mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
+    return mapindex;
+  }
+}
+
+void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
+  if (index==MI_SEGMENT_MAP_WSIZE) return;
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  uintptr_t newmask;
+  do {
+    newmask = (mask | ((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+void _mi_segment_map_freed_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
+  if (index == MI_SEGMENT_MAP_WSIZE) return;
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  uintptr_t newmask;
+  do {
+    newmask = (mask & ~((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
+static mi_segment_t* _mi_segment_of(const void* p) {
+  if (p == NULL) return NULL;
+  mi_segment_t* segment = _mi_ptr_segment(p);
+  mi_assert_internal(segment != NULL);
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
+  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
+    return segment; // yes, allocated by us
+  }
+  if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
+
+  // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
+
+  // search downwards for the first segment in case it is an interior pointer
+  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
+  // valid huge objects
+  // note: we could maintain a lowest index to speed up the path for invalid pointers?
+  size_t lobitidx;
+  size_t loindex;
+  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
+  if (lobits != 0) {
+    loindex = index;
+    lobitidx = mi_bsr(lobits);    // lobits != 0
+  }
+  else if (index == 0) {
+    return NULL;
+  }
+  else {
+    mi_assert_internal(index > 0);
+    uintptr_t lomask = mask;
+    loindex = index;
+    do {
+      loindex--;  
+      lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);      
+    } while (lomask != 0 && loindex > 0);
+    if (lomask == 0) return NULL;
+    lobitidx = mi_bsr(lomask);    // lomask != 0
+  }
+  mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
+  // take difference as the addresses could be larger than the MAX_ADDRESS space.
+  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
+  segment = (mi_segment_t*)((uint8_t*)segment - diff);
+
+  if (segment == NULL) return NULL;
+  mi_assert_internal((void*)segment < p);
+  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
+  mi_assert_internal(cookie_ok);
+  if mi_unlikely(!cookie_ok) return NULL;
+  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
+  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
+  return segment;
+}
+
+// Is this a valid pointer in our heap?
+static bool  mi_is_valid_pointer(const void* p) {
+  return (_mi_segment_of(p) != NULL);
+}
+
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return mi_is_valid_pointer(p);
+}
+
+/*
+// Return the full segment range belonging to a pointer
+static void* mi_segment_range_of(const void* p, size_t* size) {
+  mi_segment_t* segment = _mi_segment_of(p);
+  if (segment == NULL) {
+    if (size != NULL) *size = 0;
+    return NULL;
+  }
+  else {
+    if (size != NULL) *size = segment->segment_size;
+    return segment;
+  }
+  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
+  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
+  mi_reset_delayed(tld);
+  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
+  return page;
+}
+*/
diff --git a/src/segment.c b/src/segment.c
index 458980cd..e34cb2bf 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -424,7 +424,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
   return p;
 }
 
-static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size)
+static size_t mi_segment_calculate_sizes(size_t capacity, size_t required, size_t* pre_size, size_t* info_size)
 {
   const size_t minsize   = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */;
   size_t guardsize = 0;
@@ -466,6 +466,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
 
 static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
   segment->thread_id = 0;
+  _mi_segment_map_freed_at(segment);
   mi_segments_track_size(-((long)segment_size),tld);
   if (MI_SECURE != 0) {
     mi_assert_internal(!segment->mem_is_pinned);
@@ -540,6 +541,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
   segment->mem_alignment = alignment;
   segment->mem_align_offset = align_offset;
   mi_segments_track_size((long)(*segment_size), tld);
+  _mi_segment_map_allocated_at(segment);
   return segment;
 }
 
@@ -565,7 +567,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   }
   size_t info_size;
   size_t pre_size;
-  size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size);
+  size_t segment_size = mi_segment_calculate_sizes(capacity, required, &pre_size, &info_size);
   mi_assert_internal(segment_size >= required);
 
   // Initialize parameters
@@ -1199,7 +1201,6 @@ static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_p
     // possibly allocate or reclaim a fresh segment
     mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld);
     if (segment == NULL) return NULL;  // return NULL if out-of-memory (or reclaimed)
-    mi_assert_internal(free_queue->first == segment);
     mi_assert_internal(segment->page_kind==kind);
     mi_assert_internal(segment->used < segment->capacity);
     mi_assert_internal(_mi_arena_memid_is_suitable(segment->memid, heap->arena_id));
diff --git a/src/static.c b/src/static.c
index 483b1b41..bc05dd72 100644
--- a/src/static.c
+++ b/src/static.c
@@ -32,6 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "page.c"           // includes page-queue.c
 #include "random.c" 
 #include "segment.c"
+#include "segment-map.c"
 #include "stats.c"
 #include "prim/prim.c"
 #if MI_OSX_ZONE

From db74fc0c986fb3a9e88bf8d439adb49db5ea2e95 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 16:04:17 -0700
Subject: [PATCH 6/7] add abandoned reader barrier

---
 src/segment.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/segment.c b/src/segment.c
index e34cb2bf..0eec0727 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -484,6 +484,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
     fully_committed = false;
   }
   
+  _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged)
   _mi_arena_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, tld->stats);
 }
 

From d01017ffdae6e25200583eaec3d0803b499c024d Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 16:32:39 -0700
Subject: [PATCH 7/7] extend primitive api with is_zero parameters

---
 ide/vs2022/mimalloc-override.vcxproj |  1 +
 ide/vs2022/mimalloc.vcxproj          |  1 +
 include/mimalloc/internal.h          | 12 +++---
 include/mimalloc/prim.h              |  9 +++--
 src/arena.c                          | 17 +++++----
 src/init.c                           |  4 +-
 src/os.c                             | 56 +++++++++++++++-------------
 src/prim/unix/prim.c                 | 11 ++++--
 src/prim/wasi/prim.c                 |  6 ++-
 src/prim/windows/prim.c              |  6 ++-
 10 files changed, 71 insertions(+), 52 deletions(-)

diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj
index 81a3fc70..e2c7f71d 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@@ -257,6 +257,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-map.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index d7e147b8..77a1711b 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -235,6 +235,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-map.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index c776d985..9b73c92c 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -86,9 +86,9 @@ mi_heap_t* _mi_heap_main_get(void);     // statically allocated main backing hea
 void       _mi_thread_done(mi_heap_t* heap);
 
 // os.c
-void       _mi_os_init(void);                                      // called from process init
-void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
-void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
+void       _mi_os_init(void);                                            // called from process init
+void*      _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats);  // to allocate thread local data
+void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);         // to free thread local data
 size_t     _mi_os_page_size(void);
 size_t     _mi_os_good_alloc_size(size_t size);
 bool       _mi_os_has_overcommit(void);
@@ -100,15 +100,15 @@ bool       _mi_os_protect(void* addr, size_t size);
 bool       _mi_os_unprotect(void* addr, size_t size);
 bool       _mi_os_purge(void* p, size_t size, mi_stats_t* stats);
 
-void*      _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
-void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
+void*      _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* stats);
+void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats);
 void       _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
 void*      _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
 bool       _mi_os_use_large_page(size_t size, size_t alignment);
 size_t     _mi_os_large_page_size(void);
 
 void       _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
-void*      _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
+void*      _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, bool* is_zero);
 void       _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 
 // arena.c
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index b900cc95..f07bb4bd 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Each OS/host needs to implement these primitives, see `src/prim`
 // for implementations on Window, macOS, WASI, and Linux/Unix.
 //
-// note: on all primitive functions, we always get:
+// note: on all primitive functions, we always have result parameters != NUL, and:
 //  addr != NULL and page aligned
 //  size > 0     and page aligned
 //  return value is an error code an int where 0 is success.
@@ -39,9 +39,10 @@ int _mi_prim_free(void* addr, size_t size );
 // The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
 // If `commit` is false, the virtual memory range only needs to be reserved (with no access) 
 // which will later be committed explicitly using `_mi_prim_commit`.
+// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
 // pre: !commit => !allow_large
 //      try_alignment >= _mi_os_page_size() and a power of 2
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr);
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr);
 
 // Commit memory. Returns error code or 0 on success.
 // For example, on Linux this would make the memory PROT_READ|PROT_WRITE.
@@ -61,10 +62,10 @@ int _mi_prim_reset(void* addr, size_t size);
 int _mi_prim_protect(void* addr, size_t size, bool protect);
 
 // Allocate huge (1GiB) pages possibly associated with a NUMA node.
+// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
 // pre: size > 0  and a multiple of 1GiB.
-//      addr is either NULL or an address hint.
 //      numa_node is either negative (don't care), or a numa node number.
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr);
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr);
 
 // Return the current NUMA node
 size_t _mi_prim_numa_node(void);
diff --git a/src/arena.c b/src/arena.c
index 14dd8b4d..5a3dfb91 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -322,9 +322,9 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
     errno = ENOMEM;
     return NULL;
   }
-  *is_zero = true;
+  
   *memid   = MI_MEMID_OS;
-  void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats);
+  void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, is_zero, tld->stats);
   if (p != NULL) { *is_pinned = *large; }
   return p;
 }
@@ -612,8 +612,9 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is
   const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
   const size_t bitmaps = (allow_decommit ? 4 : 2);
   const size_t asize  = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
-  mi_arena_t* arena   = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
+  mi_arena_t* arena   = (mi_arena_t*)_mi_os_alloc(asize, NULL, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) return false;
+  _mi_memzero(arena, asize);
 
   // already zero'd due to os_alloc
   // _mi_memzero(arena, asize);
@@ -654,9 +655,10 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc
   if (arena_id != NULL) *arena_id = _mi_arena_id_none();
   size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
   bool large = allow_large;
-  void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main);
+  bool is_zero;
+  void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &is_zero, &_mi_stats_main);
   if (start==NULL) return ENOMEM;
-  if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) {
+  if (!mi_manage_os_memory_ex(start, size, (large || commit), large, is_zero, -1, exclusive, arena_id)) {
     _mi_os_free_ex(start, size, commit, &_mi_stats_main);
     _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
     return ENOMEM;
@@ -718,14 +720,15 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m
   if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
   size_t hsize = 0;
   size_t pages_reserved = 0;
-  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
+  bool   is_zero = false;
+  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &is_zero);
   if (p==NULL || pages_reserved==0) {
     _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
     return ENOMEM;
   }
   _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
 
-  if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) {
+  if (!mi_manage_os_memory_ex(p, hsize, true, true, is_zero, numa_node, exclusive, arena_id)) {
     _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
     return ENOMEM;
   }
diff --git a/src/init.c b/src/init.c
index b105548e..177d3034 100644
--- a/src/init.c
+++ b/src/init.c
@@ -200,10 +200,10 @@ static mi_thread_data_t* mi_thread_data_alloc(void) {
     }
   }
   // if that fails, allocate directly from the OS
-  td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+  td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), NULL, &_mi_stats_main);
   if (td == NULL) {
     // if this fails, try once more. (issue #257)
-    td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+    td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), NULL, &_mi_stats_main);
     if (td == NULL) {
       // really out of memory
       _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
diff --git a/src/os.c b/src/os.c
index 78a7b5f7..e639c751 100644
--- a/src/os.c
+++ b/src/os.c
@@ -171,27 +171,20 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) {
 -------------------------------------------------------------- */
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
+static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
+  mi_assert_internal(is_zero != NULL);
+  mi_assert_internal(is_large != NULL);
   if (size == 0) return NULL;
   if (!commit) allow_large = false;
   if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning
 
+  *is_zero = false;
   void* p = NULL; 
-  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p);
+  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
   if (err != 0) {
     _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
   }
-  /*
-  if (commit && allow_large) {
-    p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment);
-    if (p != NULL) {
-      *is_large = true;
-      return p;
-    }
-  }
-  */
-
   mi_stat_counter_increase(stats->mmap_calls, 1);
   if (p != NULL) {
     _mi_stat_increase(&stats->reserved, size);
@@ -203,16 +196,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
 
 // Primitive aligned allocation from the OS.
 // This function guarantees the allocated memory is aligned.
-static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
+static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
   mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(is_large != NULL);
+  mi_assert_internal(is_zero != NULL);
   if (!commit) allow_large = false;
   if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
   size = _mi_align_up(size, _mi_os_page_size());
 
   // try first with a hint (this will be aligned directly on Win 10+ or BSD)
-  void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats);
+  void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
   if (p == NULL) return NULL;
 
   // if not aligned, free it, overallocate, and unmap around it
@@ -224,7 +218,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
 
     if (mi_os_mem_config.must_free_whole) {  // win32 virtualAlloc cannot free parts of an allocate block
       // over-allocate uncommitted (virtual) memory
-      p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats);
+      p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
       if (p == NULL) return NULL;
 
       // set p to the aligned part in the full region
@@ -239,7 +233,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
     }
     else  { // mmap can free inside an allocation
       // overallocate...
-      p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats);
+      p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
       if (p == NULL) return NULL;
       // and selectively unmap parts around the over-allocated area. (noop on sbrk)
       void* aligned_p = mi_align_up_ptr(p, alignment);
@@ -263,16 +257,19 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
   OS API: alloc and alloc_aligned
 ----------------------------------------------------------- */
 
-void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) {
+void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* tld_stats) {
   MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
   if (size == 0) return NULL;
   size = _mi_os_good_alloc_size(size);
   bool is_large = false;
-  return mi_os_mem_alloc(size, 0, true, false, &is_large, stats);
+  bool is_zerox = false;
+  void* p = mi_os_mem_alloc(size, 0, true, false, &is_large, &is_zerox, stats);
+  if (is_zero != NULL) { *is_zero = is_zerox; }
+  return p;
 }
 
-void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats)
+void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats)
 {
   MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
   MI_UNUSED(tld_stats);
@@ -284,7 +281,12 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
     allow_large = *large;
     *large = false;
   }
-  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ );
+  bool is_largex = false;
+  bool is_zerox = false;
+  void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &is_largex, &is_zerox, &_mi_stats_main /*tld->stats*/ );
+  if (large != NULL) { *large = is_largex; }
+  if (is_zero != NULL) { *is_zero = is_zerox; }
+  return p;
 }
 
 /* -----------------------------------------------------------
@@ -295,20 +297,20 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
   to use the actual start of the memory region.
 ----------------------------------------------------------- */
 
-void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) {
+void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats) {
   mi_assert(offset <= MI_SEGMENT_SIZE);
   mi_assert(offset <= size);
   mi_assert((alignment % _mi_os_page_size()) == 0);
   if (offset > MI_SEGMENT_SIZE) return NULL;
   if (offset == 0) {
     // regular aligned allocation
-    return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats);
+    return _mi_os_alloc_aligned(size, alignment, commit, large, is_zero, tld_stats);
   }
   else {
     // overallocate to align at an offset
     const size_t extra = _mi_align_up(offset, alignment) - offset;
     const size_t oversize = size + extra;
-    void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats);
+    void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, is_zero, tld_stats);
     if (start == NULL) return NULL;
     void* p = (uint8_t*)start + extra;
     mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
@@ -521,7 +523,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
 #endif
 
 // Allocate MI_SEGMENT_SIZE aligned huge pages
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) {
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, bool* is_zero) {
   if (psize != NULL) *psize = 0;
   if (pages_reserved != NULL) *pages_reserved = 0;
   size_t size = 0;
@@ -533,11 +535,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
   // or to at least allocate as many as available on the system.
   mi_msecs_t start_t = _mi_clock_start();
   size_t page = 0;
+  bool all_zero = true;
   while (page < pages) {
     // allocate a page
+    bool is_zerox = false;
     void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
     void* p = NULL;
-    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p);
+    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zerox, &p);
+    if (!is_zerox) { all_zero = false;  }
     if (err != 0) {
       _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
       break;
@@ -576,6 +581,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
   mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
   if (pages_reserved != NULL) { *pages_reserved = page; }
   if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
+  if (is_zero != NULL) { *is_zero = all_zero; }
   return (page == 0 ? NULL : start);
 }
 
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index f9aa3b7c..eec6ca6d 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -313,12 +313,13 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
 }
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
   
-  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
+  *is_zero = true;
+  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);  
   *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
   return (*addr != NULL ? 0 : errno);
 }
@@ -417,8 +418,9 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co
 }
 #endif
 
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
   bool is_large = true;
+  *is_zero = true;
   *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
     unsigned long numa_mask = (1UL << numa_node);
@@ -436,8 +438,9 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, vo
 
 #else
 
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
   MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *is_zero = true;
   *addr = NULL;
   return ENOMEM;
 }
diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c
index 57d1c690..3f2659dd 100644
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@@ -114,9 +114,10 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) {
 }
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
   MI_UNUSED(allow_large); MI_UNUSED(commit);
   *is_large = false;
+  *is_zero = false;
   *addr = mi_prim_mem_grow(size, try_alignment);
   return (*addr != NULL ? 0 : ENOMEM);
 }
@@ -152,8 +153,9 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) {
 // Huge pages and NUMA nodes
 //---------------------------------------------
 
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
   MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *is_zero = true;
   *addr = NULL;
   return ENOSYS;
 }
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 467a42e9..514fe647 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -239,10 +239,11 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW
   return p;
 }
 
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
+  *is_zero = true;
   int flags = MEM_RESERVE;
   if (commit) { flags |= MEM_COMMIT; }
   *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
@@ -331,7 +332,8 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num
   return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
 }
 
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
+  *is_zero = true;
   *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
   return (*addr != NULL ? 0 : (int)GetLastError());
 }