merge from dev-reset

2025-07-05 19:14:37 +03:00 · 2023-04-04 16:44:07 -07:00 · 2023-04-04 16:44:07 -07:00 · 24034c997c
commit 24034c997c
parent b6603c2ee0 d01017ffda
18 changed files with 316 additions and 765 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -51,6 +51,7 @@ set(mi_sources
    src/random.c
    src/segment.c
    src/segment-cache.c
    src/segment-map.c
    src/stats.c
    src/prim/prim.c)
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@ -257,7 +257,11 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
 <<<<<<< HEAD
    <ClCompile Include="..\..\src\segment-cache.c" />
 =======
    <ClCompile Include="..\..\src\segment-map.c" />
 >>>>>>> dev-reset
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\stats.c" />
  </ItemGroup>
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@ -235,7 +235,11 @@
    </ClCompile>
    <ClCompile Include="..\..\src\page.c" />
    <ClCompile Include="..\..\src\random.c" />
 <<<<<<< HEAD
    <ClCompile Include="..\..\src\segment-cache.c" />
 =======
    <ClCompile Include="..\..\src\segment-map.c" />
 >>>>>>> dev-reset
    <ClCompile Include="..\..\src\segment.c" />
    <ClCompile Include="..\..\src\os.c" />
    <ClCompile Include="..\..\src\stats.c" />
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -284,7 +284,7 @@ mi_decl_export int   mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node,
 mi_decl_export int   mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 mi_decl_export bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
-#if MI_MALLOC_VERSION >= 200
+#if MI_MALLOC_VERSION >= 182
 // Create a heap that only allocates in the specified arena
 mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
 #endif
@ -342,12 +342,11 @@ typedef enum mi_option_e {
  mi_option_max_errors,
  mi_option_max_warnings,
  mi_option_max_segment_reclaim,
  mi_option_deprecated_segment_decommit_delay,  
  mi_option_purge_extend_delay,
  mi_option_destroy_on_exit,
  mi_option_arena_reserve,
  mi_option_arena_purge_delay,
  mi_option_allow_purge,
  mi_option_purge_extend_delay,
  _mi_option_last,
  // legacy options
  mi_option_eager_commit = mi_option_segment_eager_commit,
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@ -87,7 +87,7 @@ void       _mi_thread_done(mi_heap_t* heap);
 // os.c
 void       _mi_os_init(void);                                            // called from process init
-void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
+void*      _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats);  // to allocate thread local data
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);         // to free thread local data
 size_t     _mi_os_page_size(void);
 size_t     _mi_os_good_alloc_size(size_t size);
@ -101,15 +101,15 @@ bool       _mi_os_protect(void* addr, size_t size);
 bool       _mi_os_unprotect(void* addr, size_t size);
 bool       _mi_os_purge(void* p, size_t size, mi_stats_t* stats);
-void*      _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
+void*      _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* stats);
-void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
+void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats);
 void       _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
 void*      _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
 bool       _mi_os_use_large_page(size_t size, size_t alignment);
 size_t     _mi_os_large_page_size(void);
 void       _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
-void*      _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
+void*      _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, bool* is_zero);
 void       _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 // arena.c
@ -120,12 +120,15 @@ void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_o
 bool       _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id);
 bool       _mi_arena_is_os_allocated(size_t arena_memid);
 void       _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats);
 bool       _mi_arena_contains(const void* p);
 // "segment-cache.c"
 void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
 bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
 void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
 void       _mi_segment_cache_free_all(mi_os_tld_t* tld);
 // "segment-map.c"
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
@ -175,6 +178,7 @@ void       _mi_heap_collect_abandon(mi_heap_t* heap);
 void       _mi_heap_set_default_direct(mi_heap_t* heap);
 bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
 void       _mi_heap_destroy_all(void);
 bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Each OS/host needs to implement these primitives, see `src/prim`
 // for implementations on Window, macOS, WASI, and Linux/Unix.
 //
-// note: on all primitive functions, we always get:
+// note: on all primitive functions, we always have result parameters != NUL, and:
 //  addr != NULL and page aligned
 //  size > 0     and page aligned
 //  return value is an error code an int where 0 is success.
@ -39,19 +39,20 @@ int _mi_prim_free(void* addr, size_t size );
 // The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
 // If `commit` is false, the virtual memory range only needs to be reserved (with no access) 
 // which will later be committed explicitly using `_mi_prim_commit`.
 // `is_zero` is set to true if the memory was zero initialized (as on most OS's)
 // pre: !commit => !allow_large
 //      try_alignment >= _mi_os_page_size() and a power of 2
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr);
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr);
 // Commit memory. Returns error code or 0 on success.
 // For example, on Linux this would make the memory PROT_READ|PROT_WRITE.
 int _mi_prim_commit(void* addr, size_t size);
-// Decommit memory. Returns error code or 0 on success. The `decommitted` result is true
+// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true
 // if the memory would need to be re-committed. For example, on Windows this is always true,
 // but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit.
-// pre: decommitted != NULL
+// pre: needs_recommit != NULL
-int _mi_prim_decommit(void* addr, size_t size, bool* decommitted);
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit);
 // Reset memory. The range keeps being accessible but the content might be reset.
 // Returns error code or 0 on success.
@ -61,10 +62,10 @@ int _mi_prim_reset(void* addr, size_t size);
 int _mi_prim_protect(void* addr, size_t size, bool protect);
 // Allocate huge (1GiB) pages possibly associated with a NUMA node.
 // `is_zero` is set to true if the memory was zero initialized (as on most OS's)
 // pre: size > 0  and a multiple of 1GiB.
 //      addr is either NULL or an address hint.
 //      numa_node is either negative (don't care), or a numa node number.
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr);
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr);
 // Return the current NUMA node
 size_t _mi_prim_numa_node(void);
--- a/src/arena.c
+++ b/src/arena.c
@ -128,6 +128,10 @@ static size_t mi_block_count_of_size(size_t size) {
  return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
 }
 static size_t mi_arena_block_size(size_t bcount) {
  return (bcount * MI_ARENA_BLOCK_SIZE);
 }
 /* -----------------------------------------------------------
  Thread safe allocation in an arena
 ----------------------------------------------------------- */
@ -158,7 +162,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
  if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
  // claimed it! 
-  void* p    = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
+  void* p    = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index));
  *memid     = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index);
  *large     = arena->is_large;
  *is_pinned = (arena->is_large || !arena->allow_decommit);
@ -183,7 +187,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
    _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
    if (any_uncommitted) {
      bool commit_zero;
-      _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
+      _mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats);
      if (commit_zero) { *is_zero = true; }
    }
  }
@ -207,7 +211,7 @@ static void* mi_arena_alloc_in(mi_arena_id_t arena_id, int numa_node, size_t siz
  const size_t bcount = mi_block_count_of_size(size);  
  const size_t arena_index = mi_arena_id_index(arena_id);
  mi_assert_internal(arena_index < max_arena);
-  mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE);
+  mi_assert_internal(size <= mi_arena_block_size(bcount));
  if (arena_index >= max_arena) return NULL;
  mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
@ -228,7 +232,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
  const size_t bcount = mi_block_count_of_size(size);
  if mi_likely(max_arena == 0) return NULL;
-  mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE);
+  mi_assert_internal(size <= mi_arena_block_size(bcount));
  size_t arena_index = mi_arena_id_index(req_arena_id);
  if (arena_index < MI_MAX_ARENAS) {
@ -301,9 +305,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
    {      
      mi_arena_id_t arena_id = 0;
-      bool arena_commit = _mi_os_has_overcommit();
+      // commit eagerly?
-      if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } 
+      bool arena_commit = false;
-      else if (mi_option_get(mi_option_arena_eager_commit) == 0) { arena_commit = false; } 
+      if (mi_option_get(mi_option_arena_eager_commit) == 2)      { arena_commit = _mi_os_has_overcommit(); }
      else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
      if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) {
         p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);        
@ -317,9 +322,9 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
    errno = ENOMEM;
    return NULL;
  }
-  *is_zero = true;
+  
  *memid   = MI_MEMID_OS;
-  void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats);
+  void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, is_zero, tld->stats);
  if (p != NULL) { *is_pinned = *large; }
  return p;
 }
@ -335,7 +340,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
  if (arena_index >= MI_MAX_ARENAS) return NULL;
  mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
  if (arena == NULL) return NULL;
-  if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE;
+  if (size != NULL) { *size = mi_arena_block_size(arena->block_count); }
  return arena->start;
 }
@ -348,8 +353,8 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
  mi_assert_internal(arena->blocks_committed != NULL);
  mi_assert_internal(arena->blocks_purge != NULL);
  mi_assert_internal(arena->allow_decommit);
-  const size_t size = blocks * MI_ARENA_BLOCK_SIZE;
+  const size_t size = mi_arena_block_size(blocks);
-  void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE);
+  void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx));
  const bool decommitted = _mi_os_purge(p, size, stats);
  // clear the purged blocks
  _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
@ -557,6 +562,19 @@ void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats)
  mi_arenas_try_purge(force_decommit, true, stats);
 }
 bool _mi_arena_contains(const void* p) {
  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
  for (size_t i = 0; i < max_arena; i++) {
    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
    if (arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { 
      return true;      
    }
  }
  return false;
 }
 /* -----------------------------------------------------------
  Add an arena.
 ----------------------------------------------------------- */
@ -594,8 +612,9 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is
  const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
  const size_t bitmaps = (allow_decommit ? 4 : 2);
  const size_t asize  = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
-  mi_arena_t* arena   = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
+  mi_arena_t* arena   = (mi_arena_t*)_mi_os_alloc(asize, NULL, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
  if (arena == NULL) return false;
  _mi_memzero(arena, asize);
  // already zero'd due to os_alloc
  // _mi_memzero(arena, asize);
@ -636,9 +655,10 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc
  if (arena_id != NULL) *arena_id = _mi_arena_id_none();
  size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
  bool large = allow_large;
-  void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main);
+  bool is_zero;
  void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &is_zero, &_mi_stats_main);
  if (start==NULL) return ENOMEM;
-  if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) {
+  if (!mi_manage_os_memory_ex(start, size, (large || commit), large, is_zero, -1, exclusive, arena_id)) {
    _mi_os_free_ex(start, size, commit, &_mi_stats_main);
    _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
    return ENOMEM;
@ -700,14 +720,15 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m
  if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
  size_t hsize = 0;
  size_t pages_reserved = 0;
-  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
+  bool   is_zero = false;
  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &is_zero);
  if (p==NULL || pages_reserved==0) {
    _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
    return ENOMEM;
  }
  _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
-  if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) {
+  if (!mi_manage_os_memory_ex(p, hsize, true, true, is_zero, numa_node, exclusive, arena_id)) {
    _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
    return ENOMEM;
  }
--- a/src/heap.c
+++ b/src/heap.c
@ -169,7 +169,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
  // collect regions on program-exit (or shared library unload)
  if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
    //_mi_mem_collect(&heap->tld->os);
    _mi_arena_collect(false,true /* force purge */,&heap->tld->stats);
  }
 }
--- a/src/init.c
+++ b/src/init.c
@ -226,10 +226,10 @@ static mi_thread_data_t* mi_thread_data_alloc(void) {
    }
  }
  // if that fails, allocate directly from the OS
-  td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+  td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), NULL, &_mi_stats_main);
  if (td == NULL) {
    // if this fails, try once more. (issue #257)
-    td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+    td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), NULL, &_mi_stats_main);
    if (td == NULL) {
      // really out of memory
      _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
--- a/src/options.c
+++ b/src/options.c
@ -41,7 +41,7 @@ typedef struct mi_option_desc_s {
  mi_init_t   init;   // is it initialized yet? (from the environment)
  mi_option_t option; // for debugging: the option index should match the option
  const char* name;   // option name without `mimalloc_` prefix
-  const char* legacy_name; // potential legacy v1.x option name
+  const char* legacy_name; // potential legacy option name
 } mi_option_desc_t;
 #define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
@ -58,10 +58,10 @@ static mi_option_desc_t options[_mi_option_last] =
  { 0, UNINIT, MI_OPTION(show_stats) },
  { 0, UNINIT, MI_OPTION(verbose) },
-  // Some of the following options are experimental and not all combinations are valid. Use with care.
+  // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
+  { 1, UNINIT, MI_OPTION_LEGACY(segment_eager_commit,eager_commit) },      // commit per segment directly (4MiB)  (but see also `eager_commit_delay`)
  { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) },
-  { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },
+  { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },        // purge decommits memory (instead of reset)  
  { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
  { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
  { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
@ -75,15 +75,13 @@ static mi_option_desc_t options[_mi_option_last] =
  #else
  { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
-  { 10,   UNINIT, MI_OPTION_LEGACY(purge_delay, decommit_delay) }, // page decommit delay in milli-seconds
+  { 10,   UNINIT, MI_OPTION_LEGACY(purge_delay, reset_delay) }, // page decommit delay in milli-seconds
  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes.
  { 0,   UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
  { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
  { 16,  UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
  { 16,  UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
  { 8,   UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.
  { 100,  UNINIT, MI_OPTION(deprecated_segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
  { 1,    UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
  { 0,   UNINIT, MI_OPTION(destroy_on_exit)},    // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
  #if (MI_INTPTR_SIZE>4)
  { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time
@ -91,7 +89,8 @@ static mi_option_desc_t options[_mi_option_last] =
  {  128L * 1024L, UNINIT, MI_OPTION(arena_reserve) },
  #endif
  { 100, UNINIT, MI_OPTION(arena_purge_delay) }, // reset/decommit delay in milli-seconds for arena allocation
-  { 1,   UNINIT, MI_OPTION(allow_purge) }        // allow decommit/reset to free (physical) memory back to the OS
+  { 1,   UNINIT, MI_OPTION(allow_purge) },       // allow decommit/reset to free (physical) memory back to the OS
  { 1,    UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },  
 };
 static void mi_option_init(mi_option_desc_t* desc);
--- a/src/os.c
+++ b/src/os.c
@ -160,27 +160,20 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) {
 -------------------------------------------------------------- */
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
+static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
  mi_assert_internal(is_zero != NULL);
  mi_assert_internal(is_large != NULL);
  if (size == 0) return NULL;
  if (!commit) allow_large = false;
  if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning
  *is_zero = false;
  void* p = NULL; 
-  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p);
+  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
  if (err != 0) {
    _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
  }
  /*
  if (commit && allow_large) {
    p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment);
    if (p != NULL) {
      *is_large = true;
      return p;
    }
  }
  */
  mi_stat_counter_increase(stats->mmap_calls, 1);
  if (p != NULL) {
    _mi_stat_increase(&stats->reserved, size);
@ -192,16 +185,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
 // Primitive aligned allocation from the OS.
 // This function guarantees the allocated memory is aligned.
-static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
+static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
  mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
  mi_assert_internal(is_large != NULL);
  mi_assert_internal(is_zero != NULL);
  if (!commit) allow_large = false;
  if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
  size = _mi_align_up(size, _mi_os_page_size());
  // try first with a hint (this will be aligned directly on Win 10+ or BSD)
-  void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats);
+  void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
  if (p == NULL) return NULL;
  // if not aligned, free it, overallocate, and unmap around it
@ -213,7 +207,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
    if (mi_os_mem_config.must_free_whole) {  // win32 virtualAlloc cannot free parts of an allocate block
      // over-allocate uncommitted (virtual) memory
-      p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats);
+      p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
      if (p == NULL) return NULL;
      // set p to the aligned part in the full region
@ -228,7 +222,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
    }
    else  { // mmap can free inside an allocation
      // overallocate...
-      p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats);
+      p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
      if (p == NULL) return NULL;
      // and selectively unmap parts around the over-allocated area. (noop on sbrk)
      void* aligned_p = mi_align_up_ptr(p, alignment);
@ -252,16 +246,19 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
  OS API: alloc and alloc_aligned
 ----------------------------------------------------------- */
-void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) {
+void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* tld_stats) {
  MI_UNUSED(tld_stats);
  mi_stats_t* stats = &_mi_stats_main;
  if (size == 0) return NULL;
  size = _mi_os_good_alloc_size(size);
  bool is_large = false;
-  return mi_os_mem_alloc(size, 0, true, false, &is_large, stats);
+  bool is_zerox = false;
  void* p = mi_os_mem_alloc(size, 0, true, false, &is_large, &is_zerox, stats);
  if (is_zero != NULL) { *is_zero = is_zerox; }
  return p;
 }
-void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats)
+void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats)
 {
  MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
  MI_UNUSED(tld_stats);
@ -273,7 +270,12 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
    allow_large = *large;
    *large = false;
  }
-  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ );
+  bool is_largex = false;
  bool is_zerox = false;
  void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &is_largex, &is_zerox, &_mi_stats_main /*tld->stats*/ );
  if (large != NULL) { *large = is_largex; }
  if (is_zero != NULL) { *is_zero = is_zerox; }
  return p;
 }
 /* -----------------------------------------------------------
@ -284,20 +286,20 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
  to use the actual start of the memory region.
 ----------------------------------------------------------- */
-void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) {
+void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, bool* is_zero, mi_stats_t* tld_stats) {
  mi_assert(offset <= MI_SEGMENT_SIZE);
  mi_assert(offset <= size);
  mi_assert((alignment % _mi_os_page_size()) == 0);
  if (offset > MI_SEGMENT_SIZE) return NULL;
  if (offset == 0) {
    // regular aligned allocation
-    return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats);
+    return _mi_os_alloc_aligned(size, alignment, commit, large, is_zero, tld_stats);
  }
  else {
    // overallocate to align at an offset
    const size_t extra = _mi_align_up(offset, alignment) - offset;
    const size_t oversize = size + extra;
-    void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats);
+    void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, is_zero, tld_stats);
    if (start == NULL) return NULL;
    void* p = (uint8_t*)start + extra;
    mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
@ -366,10 +368,10 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats
  return (err == 0);
 }
-static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_stats_t* tld_stats) {
+static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) {
  MI_UNUSED(tld_stats);
  mi_stats_t* stats = &_mi_stats_main;
-  mi_assert_internal(decommitted!=NULL);
+  mi_assert_internal(needs_recommit!=NULL);
  _mi_stat_decrease(&stats->committed, size);
  // page align
@ -378,8 +380,8 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_sta
  if (csize == 0) return true; 
  // decommit
-  *decommitted = true;
+  *needs_recommit = true;
-  int err = _mi_prim_decommit(start,csize,decommitted);  
+  int err = _mi_prim_decommit(start,csize,needs_recommit);  
  if (err != 0) {
    _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
  }
@ -388,8 +390,8 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* decommitted, mi_sta
 }
 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
-  bool decommitted = true;
+  bool needs_recommit;
-  return mi_os_decommit_ex(addr, size, &decommitted, tld_stats);
+  return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats);
 }
@ -419,8 +421,8 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
 }
-// either resets or decommits memory, returns true if the memory was decommitted 
+// either resets or decommits memory, returns true if the memory needs 
-// (in the sense that it needs to be re-committed if the memory is re-used later on).
+// to be recommitted if it is to be re-used later on.
 bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats)
 {
  if (!mi_option_is_enabled(mi_option_allow_purge)) return false;
@ -428,9 +430,9 @@ bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats)
  if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
      !_mi_preloading())                                   // don't decommit during preloading (unsafe)
  {
-    bool decommitted;
+    bool needs_recommit;
-    mi_os_decommit_ex(p, size, &decommitted, stats);
+    mi_os_decommit_ex(p, size, &needs_recommit, stats);
-    return decommitted;   
+    return needs_recommit;   
  }
  else {
    _mi_os_reset(p, size, stats);
@ -512,7 +514,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
 #endif
 // Allocate MI_SEGMENT_SIZE aligned huge pages
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) {
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, bool* is_zero) {
  if (psize != NULL) *psize = 0;
  if (pages_reserved != NULL) *pages_reserved = 0;
  size_t size = 0;
@ -524,11 +526,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
  // or to at least allocate as many as available on the system.
  mi_msecs_t start_t = _mi_clock_start();
  size_t page = 0;
  bool all_zero = true;
  while (page < pages) {
    // allocate a page
    bool is_zerox = false;
    void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
    void* p = NULL;
-    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p);
+    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zerox, &p);
    if (!is_zerox) { all_zero = false;  }
    if (err != 0) {
      _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
      break;
@ -567,6 +572,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
  if (pages_reserved != NULL) { *pages_reserved = page; }
  if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
  if (is_zero != NULL) { *is_zero = all_zero; }
  return (page == 0 ? NULL : start);
 }
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@ -313,11 +313,12 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
 }
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
  mi_assert_internal(commit || !allow_large);
  mi_assert_internal(try_alignment > 0);
  *is_zero = true;
  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);  
  *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
  return (*addr != NULL ? 0 : errno);
@ -348,16 +349,16 @@ int _mi_prim_commit(void* start, size_t size) {
  return err;
 }
-int _mi_prim_decommit(void* start, size_t size, bool* decommitted) {
+int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
  int err = 0;
  #if defined(MADV_DONTNEED) && !MI_DEBUG && !MI_SECURE
    // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
    // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( )
-    *decommitted = false;
+    *needs_recommit = false;
    err = unix_madvise(start, size, MADV_DONTNEED);
  #else
    // decommit: just disable access (also used in debug and secure mode to trap on illegal access)
-    *decommitted = true;  // needs recommit to reuse the memory
+    *needs_recommit = true;  // needs recommit to reuse the memory
    err = mprotect(start, size, PROT_NONE);
    if (err != 0) { err = errno; }
  #endif
@ -413,8 +414,9 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co
 }
 #endif
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
  bool is_large = true;
  *is_zero = true;
  *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
  if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
    unsigned long numa_mask = (1UL << numa_node);
@ -432,8 +434,9 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, vo
 #else
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
  *is_zero = true;
  *addr = NULL;
  return ENOMEM;
 }
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@ -114,9 +114,10 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) {
 }
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
  MI_UNUSED(allow_large); MI_UNUSED(commit);
  *is_large = false;
  *is_zero = false;
  *addr = mi_prim_mem_grow(size, try_alignment);
  return (*addr != NULL ? 0 : ENOMEM);
 }
@ -131,9 +132,9 @@ int _mi_prim_commit(void* addr, size_t size) {
  return 0;
 }
-int _mi_prim_decommit(void* addr, size_t size, bool* decommitted) {
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
  MI_UNUSED(addr); MI_UNUSED(size);
-  *decommitted = false;
+  *needs_recommit = false;
  return 0;
 }
@ -152,8 +153,9 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) {
 // Huge pages and NUMA nodes
 //---------------------------------------------
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
  *is_zero = true;
  *addr = NULL;
  return ENOSYS;
 }
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@ -239,10 +239,11 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW
  return p;
 }
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
  mi_assert_internal(commit || !allow_large);
  mi_assert_internal(try_alignment > 0);
  *is_zero = true;
  int flags = MEM_RESERVE;
  if (commit) { flags |= MEM_COMMIT; }
  *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
@ -262,9 +263,9 @@ int _mi_prim_commit(void* addr, size_t size) {
  return (p == addr ? 0 : (int)GetLastError());  
 }
-int _mi_prim_decommit(void* addr, size_t size, bool* decommitted) {  
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {  
  BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
-  *decommitted = true;  // for safetly, assume always decommitted even in the case of an error.
+  *needs_recommit = true;  // for safetly, assume always decommitted even in the case of an error.
  return (ok ? 0 : (int)GetLastError());
 }
@ -331,7 +332,8 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num
  return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
 }
-int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
  *is_zero = true;
  *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
  return (*addr != NULL ? 0 : (int)GetLastError());
 }
--- a/src/region.c
+++ b/src/region.c
@ -1,502 +0,0 @@
 /* ----------------------------------------------------------------------------
 Copyright (c) 2019-2020, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
 /* ----------------------------------------------------------------------------
 This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
 and the segment and huge object allocation by mimalloc. There may be multiple
 implementations of this (one could be the identity going directly to the OS,
 another could be a simple cache etc), but the current one uses large "regions".
 In contrast to the rest of mimalloc, the "regions" are shared between threads and
 need to be accessed using atomic operations.
 We need this memory layer between the raw OS calls because of:
 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
   to reuse memory effectively.
 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
   an OS allocation/free is still (much) too expensive relative to the accesses
   in that object :-( (`malloc-large` tests this). This means we need a cheaper
   way to reuse memory.
 3. This layer allows for NUMA aware allocation.
 Possible issues:
 - (2) can potentially be addressed too with a small cache per thread which is much
  simpler. Generally though that requires shrinking of huge pages, and may overuse
  memory per thread. (and is not compatible with `sbrk`).
 - Since the current regions are per-process, we need atomic operations to
  claim blocks which may be contended
 - In the worst case, we need to search the whole region map (16KiB for 256GiB)
  linearly. At what point will direct OS calls be faster? Is there a way to
  do this better without adding too much complexity?
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
 #include "mimalloc/internal.h"
 #include "mimalloc/atomic.h"
 #include <string.h>  // memset
 #include "bitmap.h"
 // os.c
 bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
 // Constants
 #if (MI_INTPTR_SIZE==8)
 #define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map
 #elif (MI_INTPTR_SIZE==4)
 #define MI_HEAP_REGION_MAX_SIZE    (3 * MI_GiB)    // ~ KiB for the region map
 #else
 #error "define the maximum heap space allowed for regions on this platform"
 #endif
 #define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
 #define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
 #define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
 #define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
 #define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
 // Region info
 typedef union mi_region_info_u {
  size_t value;
  struct {
    bool  valid;        // initialized?
    bool  is_large:1;   // allocated in fixed large/huge OS pages
    bool  is_pinned:1;  // pinned memory cannot be decommitted
    short numa_node;    // the associated NUMA node (where -1 means no associated node)
  } x;
 } mi_region_info_t;
 // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
  _Atomic(size_t)           info;        // mi_region_info_t.value
  _Atomic(void*)            start;       // start of the memory area
  mi_bitmap_field_t         in_use;      // bit per in-use block
  mi_bitmap_field_t         dirty;       // track if non-zero per block
  mi_bitmap_field_t         commit;      // track if committed per block
  mi_bitmap_field_t         reset;       // track if reset per block
  _Atomic(size_t)           arena_memid; // if allocated from a (huge page) arena
  _Atomic(size_t)           padding;     // round to 8 fields (needs to be atomic for msvc, see issue #508)
 } mem_region_t;
 // The region map
 static mem_region_t regions[MI_REGION_MAX];
 // Allocated regions
 static _Atomic(size_t) regions_count; // = 0;
 /* ----------------------------------------------------------------------------
 Utility functions
 -----------------------------------------------------------------------------*/
 // Blocks (of 4MiB) needed for the given size.
 static size_t mi_region_block_count(size_t size) {
  return _mi_divide_up(size, MI_SEGMENT_SIZE);
 }
 /*
 // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
 static size_t mi_good_commit_size(size_t size) {
  if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
  return _mi_align_up(size, _mi_os_large_page_size());
 }
 */
 // Return if a pointer points into a region reserved by us.
 mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
  if (p==NULL) return false;
  size_t count = mi_atomic_load_relaxed(&regions_count);
  for (size_t i = 0; i < count; i++) {
    uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, &regions[i].start);
    if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
  }
  return false;
 }
 static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
  mi_assert_internal(start != NULL);
  return (start + (bit_idx * MI_SEGMENT_SIZE));
 }
 static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
  mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
  size_t idx = region - regions;
  mi_assert_internal(&regions[idx] == region);
  return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
 }
 static size_t mi_memid_create_from_arena(size_t arena_memid) {
  return (arena_memid << 1) | 1;
 }
 static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
  if ((id&1)==1) {
    if (arena_memid != NULL) *arena_memid = (id>>1);
    return true;
  }
  else {
    size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
    *bit_idx   = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
    *region    = &regions[idx];
    return false;
  }
 }
 /* ----------------------------------------------------------------------------
  Allocate a region is allocated from the OS (or an arena)
 -----------------------------------------------------------------------------*/
 static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
  // not out of regions yet?
  if (mi_atomic_load_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
  // try to allocate a fresh region from the OS
  bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
  bool region_large = (commit && allow_large);
  bool is_zero = false;
  bool is_pinned = false;
  size_t arena_memid = 0;
  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
  if (start == NULL) return false;
  mi_assert_internal(!(region_large && !allow_large));
  mi_assert_internal(!region_large || region_commit);
  // claim a fresh slot
  const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
  if (idx >= MI_REGION_MAX) {
    mi_atomic_decrement_acq_rel(&regions_count);
    _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats);
    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
    return false;
  }
  // allocated, initialize and claim the initial blocks
  mem_region_t* r = &regions[idx];
  r->arena_memid  = arena_memid;
  mi_atomic_store_release(&r->in_use, (size_t)0);
  mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
  mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
  mi_atomic_store_release(&r->reset, (size_t)0);
  *bit_idx = 0;
  _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
  mi_atomic_store_ptr_release(void,&r->start, start);
  // and share it
  mi_region_info_t info;
  info.value = 0;                        // initialize the full union to zero
  info.x.valid = true;
  info.x.is_large = region_large;
  info.x.is_pinned = is_pinned;
  info.x.numa_node = (short)_mi_os_numa_node(tld);
  mi_atomic_store_release(&r->info, info.value); // now make it available to others
  *region = r;
  return true;
 }
 /* ----------------------------------------------------------------------------
  Try to claim blocks in suitable regions
 -----------------------------------------------------------------------------*/
 static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
  // initialized at all?
  mi_region_info_t info;
  info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info);
  if (info.value==0) return false;
  // numa correct
  if (numa_node >= 0) {  // use negative numa node to always succeed
    int rnode = info.x.numa_node;
    if (rnode >= 0 && rnode != numa_node) return false;
  }
  // check allow-large
  if (!allow_large && info.x.is_large) return false;
  return true;
 }
 static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
  // try all regions for a free slot
  const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
  for (size_t visited = 0; visited < count; visited++, idx++) {
    if (idx >= count) idx = 0;  // wrap around
    mem_region_t* r = &regions[idx];
    // if this region suits our demand (numa node matches, large OS page matches)
    if (mi_region_is_suitable(r, numa_node, allow_large)) {
      // then try to atomically claim a segment(s) in this region
      if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
        tld->region_idx = idx;    // remember the last found position
        *region = r;
        return true;
      }
    }
  }
  return false;
 }
 static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
  mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
  mem_region_t* region;
  mi_bitmap_index_t bit_idx;
  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
  // try to claim in existing regions
  if (!mi_region_try_claim(numa_node, blocks, *large, &region, &bit_idx, tld)) {
    // otherwise try to allocate a fresh region and claim in there
    if (!mi_region_try_alloc_os(blocks, *commit, *large, &region, &bit_idx, tld)) {
      // out of regions or memory
      return NULL;
    }
  }
  // ------------------------------------------------
  // found a region and claimed `blocks` at `bit_idx`, initialize them now
  mi_assert_internal(region != NULL);
  mi_assert_internal(_mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
  mi_region_info_t info;
  info.value = mi_atomic_load_acquire(&region->info);
  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&region->start);
  mi_assert_internal(!(info.x.is_large && !*large));
  mi_assert_internal(start != NULL);
  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);
  *large     = info.x.is_large;
  *is_pinned = info.x.is_pinned;
  *memid     = mi_memid_create(region, bit_idx);
  void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
  // commit
  if (*commit) {
    // ensure commit
    bool any_uncommitted;
    _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
    if (any_uncommitted) {
      mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
      bool commit_zero = false;
      if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) {
        // failed to commit! unclaim and return
        _mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
        return NULL;
      }
      if (commit_zero) *is_zero = true;
    }
  }
  else {
    // no need to commit, but check if already fully committed
    *commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
  }
  mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
  // unreset reset blocks
  if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
    // some blocks are still reset
    mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
    _mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
    if (*commit || !mi_option_is_enabled(mi_option_purge_decommits)) { // only if needed
      bool reset_zero = false;
      _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
      if (reset_zero) *is_zero = true;
    }
  }
  mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
  #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED  // && !MI_TSAN
  if (*commit) { ((uint8_t*)p)[0] = 0; }
  #endif
  // and return the allocation
  mi_assert_internal(p != NULL);
  return p;
 }
 /* ----------------------------------------------------------------------------
 Allocation
 -----------------------------------------------------------------------------*/
 // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
 // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
 void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
  mi_assert_internal(memid != NULL && tld != NULL);
  mi_assert_internal(size > 0);
  *memid = 0;
  *is_zero = false;
  *is_pinned = false;
  bool default_large = false;
  if (large==NULL) large = &default_large;  // ensure `large != NULL`
  if (size == 0) return NULL;
  size = _mi_align_up(size, _mi_os_page_size());
  // allocate from regions if possible
  void* p = NULL;
  size_t arena_memid;
  const size_t blocks = mi_region_block_count(size);
  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
    if (p == NULL) {
      _mi_warning_message("unable to allocate from region: size %zu\n", size);
    }
  }
  if (p == NULL) {
    // and otherwise fall back to the OS
    p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
    *memid = mi_memid_create_from_arena(arena_memid);
  }
  if (p != NULL) {
    mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0);
    #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED  // && !MI_TSAN
    if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
    #endif
  }
  return p;
 }
 /* ----------------------------------------------------------------------------
 Free
 -----------------------------------------------------------------------------*/
 // Free previously allocated memory with a given id.
 void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
  mi_assert_internal(size > 0 && tld != NULL);
  if (p==NULL) return;
  if (size==0) return;
  size = _mi_align_up(size, _mi_os_page_size());
  size_t arena_memid = 0;
  mi_bitmap_index_t bit_idx;
  mem_region_t* region;
  if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
   // was a direct arena allocation, pass through
    _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats);
  }
  else {
    // allocated in a region
    mi_assert_internal(align_offset == 0);
    mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
    const size_t blocks = mi_region_block_count(size);
    mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
    mi_region_info_t info;
    info.value = mi_atomic_load_acquire(&region->info);
    mi_assert_internal(info.value != 0);
    void* blocks_start = mi_region_blocks_start(region, bit_idx);
    mi_assert_internal(blocks_start == p); // not a pointer in our area?
    mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
    if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
    // committed?
    if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
      _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
    }
    if (any_reset) {
      // set the is_reset bits if any pages were reset
      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
    }
    // reset the blocks to reduce the working set.
    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
       && (mi_option_is_enabled(mi_option_eager_commit) ||
           mi_option_is_enabled(mi_option_purge_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
    {
      bool any_unreset;
      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
      if (any_unreset) {
        _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
        _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
      }
    }
    // and unclaim
    bool all_unclaimed = _mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
    mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed);
  }
 }
 /* ----------------------------------------------------------------------------
  collection
 -----------------------------------------------------------------------------*/
 void _mi_mem_collect(mi_os_tld_t* tld) {
  // free every region that has no segments in use.
  size_t rcount = mi_atomic_load_relaxed(&regions_count);
  for (size_t i = 0; i < rcount; i++) {
    mem_region_t* region = &regions[i];
    if (mi_atomic_load_relaxed(&region->info) != 0) {
      // if no segments used, try to claim the whole region
      size_t m = mi_atomic_load_relaxed(&region->in_use);
      while (m == 0 && !mi_atomic_cas_weak_release(&region->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
      if (m == 0) {
        // on success, free the whole region
        uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&regions[i].start);
        size_t arena_memid = mi_atomic_load_relaxed(&regions[i].arena_memid);
        size_t commit = mi_atomic_load_relaxed(&regions[i].commit);
        memset((void*)&regions[i], 0, sizeof(mem_region_t));  // cast to void* to avoid atomic warning
        // and release the whole region
        mi_atomic_store_release(&region->info, (size_t)0);
        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
          _mi_abandoned_await_readers(); // ensure no pending reads
          _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats);
        }
      }
    }
  }
 }
 /* ----------------------------------------------------------------------------
  Other
 -----------------------------------------------------------------------------*/
 bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
  if (mi_option_is_enabled(mi_option_purge_decommits)) {
    return _mi_os_decommit(p, size, tld->stats);
  }
  else {
    return _mi_os_reset(p, size, tld->stats);
  }
 }
 bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
  if (mi_option_is_enabled(mi_option_purge_decommits)) {
    return _mi_os_commit(p, size, is_zero, tld->stats);
  }
  else {
    // return _mi_os_unreset(p, size, is_zero, tld->stats);
    return true;
  }
 }
 bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
  return _mi_os_commit(p, size, is_zero, tld->stats);
 }
 bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
  return _mi_os_decommit(p, size, tld->stats);
 }
 bool _mi_mem_protect(void* p, size_t size) {
  return _mi_os_protect(p, size);
 }
 bool _mi_mem_unprotect(void* p, size_t size) {
  return _mi_os_unprotect(p, size);
 }
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@ -275,149 +275,3 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
  return true;
 #endif
 }
 /* -----------------------------------------------------------
  The following functions are to reliably find the segment or
  block that encompasses any pointer p (or NULL if it is not
  in any of our segments).
  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
  set to 1 if it contains the segment meta data.
 ----------------------------------------------------------- */
 #if (MI_INTPTR_SIZE==8)
 #define MI_MAX_ADDRESS    ((size_t)40 << 40)  // 20TB
 #else
 #define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
 #endif
 #define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
 #define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
 #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
 static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
 static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
  mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
  if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
    *bitidx = 0;
    return MI_SEGMENT_MAP_WSIZE;
  }
  else {
    const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
    *bitidx = segindex % MI_INTPTR_BITS;
    const size_t mapindex = segindex / MI_INTPTR_BITS;
    mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
    return mapindex;
  }
 }
 void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
  size_t bitidx;
  size_t index = mi_segment_map_index_of(segment, &bitidx);
  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
  if (index==MI_SEGMENT_MAP_WSIZE) return;
  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
  uintptr_t newmask;
  do {
    newmask = (mask | ((uintptr_t)1 << bitidx));
  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
 }
 void _mi_segment_map_freed_at(const mi_segment_t* segment) {
  size_t bitidx;
  size_t index = mi_segment_map_index_of(segment, &bitidx);
  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
  if (index == MI_SEGMENT_MAP_WSIZE) return;
  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
  uintptr_t newmask;
  do {
    newmask = (mask & ~((uintptr_t)1 << bitidx));
  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
 }
 // Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
 static mi_segment_t* _mi_segment_of(const void* p) {
  if (p == NULL) return NULL;
  mi_segment_t* segment = _mi_ptr_segment(p);
  mi_assert_internal(segment != NULL);
  size_t bitidx;
  size_t index = mi_segment_map_index_of(segment, &bitidx);
  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
  if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
    return segment; // yes, allocated by us
  }
  if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
  // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
  // search downwards for the first segment in case it is an interior pointer
  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
  // valid huge objects
  // note: we could maintain a lowest index to speed up the path for invalid pointers?
  size_t lobitidx;
  size_t loindex;
  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
  if (lobits != 0) {
    loindex = index;
    lobitidx = mi_bsr(lobits);    // lobits != 0
  }
  else if (index == 0) {
    return NULL;
  }
  else {
    mi_assert_internal(index > 0);
    uintptr_t lomask = mask;
    loindex = index;
    do {
      loindex--;  
      lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);      
    } while (lomask != 0 && loindex > 0);
    if (lomask == 0) return NULL;
    lobitidx = mi_bsr(lomask);    // lomask != 0
  }
  mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
  // take difference as the addresses could be larger than the MAX_ADDRESS space.
  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
  segment = (mi_segment_t*)((uint8_t*)segment - diff);
  if (segment == NULL) return NULL;
  mi_assert_internal((void*)segment < p);
  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
  mi_assert_internal(cookie_ok);
  if mi_unlikely(!cookie_ok) return NULL;
  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
  return segment;
 }
 // Is this a valid pointer in our heap?
 static bool  mi_is_valid_pointer(const void* p) {
  return (_mi_segment_of(p) != NULL);
 }
 mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
  return mi_is_valid_pointer(p);
 }
 /*
 // Return the full segment range belonging to a pointer
 static void* mi_segment_range_of(const void* p, size_t* size) {
  mi_segment_t* segment = _mi_segment_of(p);
  if (segment == NULL) {
    if (size != NULL) *size = 0;
    return NULL;
  }
  else {
    if (size != NULL) *size = segment->segment_size;
    return segment;
  }
  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
  mi_reset_delayed(tld);
  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
  return page;
 }
 */
--- a/src/segment-map.c
+++ b/src/segment-map.c
@ -0,0 +1,153 @@
 /* ----------------------------------------------------------------------------
 Copyright (c) 2019-2023, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
 /* -----------------------------------------------------------
  The following functions are to reliably find the segment or
  block that encompasses any pointer p (or NULL if it is not
  in any of our segments).
  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
  set to 1 if it contains the segment meta data.
 ----------------------------------------------------------- */
 #include "mimalloc.h"
 #include "mimalloc/internal.h"
 #include "mimalloc/atomic.h"
 #if (MI_INTPTR_SIZE==8)
 #define MI_MAX_ADDRESS    ((size_t)40 << 40)  // 40TB (to include huge page areas)
 #else
 #define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
 #endif
 #define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
 #define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
 #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
 static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
 static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
  mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
  if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
    *bitidx = 0;
    return MI_SEGMENT_MAP_WSIZE;
  }
  else {
    const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
    *bitidx = segindex % MI_INTPTR_BITS;
    const size_t mapindex = segindex / MI_INTPTR_BITS;
    mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
    return mapindex;
  }
 }
 void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
  size_t bitidx;
  size_t index = mi_segment_map_index_of(segment, &bitidx);
  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
  if (index==MI_SEGMENT_MAP_WSIZE) return;
  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
  uintptr_t newmask;
  do {
    newmask = (mask | ((uintptr_t)1 << bitidx));
  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
 }
 void _mi_segment_map_freed_at(const mi_segment_t* segment) {
  size_t bitidx;
  size_t index = mi_segment_map_index_of(segment, &bitidx);
  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
  if (index == MI_SEGMENT_MAP_WSIZE) return;
  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
  uintptr_t newmask;
  do {
    newmask = (mask & ~((uintptr_t)1 << bitidx));
  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
 }
 // Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
 static mi_segment_t* _mi_segment_of(const void* p) {
  if (p == NULL) return NULL;
  mi_segment_t* segment = _mi_ptr_segment(p);
  mi_assert_internal(segment != NULL);
  size_t bitidx;
  size_t index = mi_segment_map_index_of(segment, &bitidx);
  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
  if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
    return segment; // yes, allocated by us
  }
  if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
  // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
  // search downwards for the first segment in case it is an interior pointer
  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
  // valid huge objects
  // note: we could maintain a lowest index to speed up the path for invalid pointers?
  size_t lobitidx;
  size_t loindex;
  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
  if (lobits != 0) {
    loindex = index;
    lobitidx = mi_bsr(lobits);    // lobits != 0
  }
  else if (index == 0) {
    return NULL;
  }
  else {
    mi_assert_internal(index > 0);
    uintptr_t lomask = mask;
    loindex = index;
    do {
      loindex--;  
      lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);      
    } while (lomask != 0 && loindex > 0);
    if (lomask == 0) return NULL;
    lobitidx = mi_bsr(lomask);    // lomask != 0
  }
  mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
  // take difference as the addresses could be larger than the MAX_ADDRESS space.
  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
  segment = (mi_segment_t*)((uint8_t*)segment - diff);
  if (segment == NULL) return NULL;
  mi_assert_internal((void*)segment < p);
  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
  mi_assert_internal(cookie_ok);
  if mi_unlikely(!cookie_ok) return NULL;
  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
  return segment;
 }
 // Is this a valid pointer in our heap?
 static bool  mi_is_valid_pointer(const void* p) {
  return (_mi_segment_of(p) != NULL);
 }
 mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
  return mi_is_valid_pointer(p);
 }
 /*
 // Return the full segment range belonging to a pointer
 static void* mi_segment_range_of(const void* p, size_t* size) {
  mi_segment_t* segment = _mi_segment_of(p);
  if (segment == NULL) {
    if (size != NULL) *size = 0;
    return NULL;
  }
  else {
    if (size != NULL) *size = segment->segment_size;
    return segment;
  }
  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
  mi_reset_delayed(tld);
  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
  return page;
 }
 */
--- a/src/static.c
+++ b/src/static.c
@ -33,6 +33,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "random.c" 
 #include "segment.c"
 #include "segment-cache.c"
 #include "segment-map.c"
 #include "stats.c"
 #include "prim/prim.c"
 #if MI_OSX_ZONE