From d36d04b4a6e5ada99fa36447332e5d7d3b1d33be Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 15:35:10 -0700
Subject: [PATCH 001/104] add arena for huge page management

---
 ide/vs2019/mimalloc-override.vcxproj |   1 +
 ide/vs2019/mimalloc.vcxproj          |   1 +
 include/mimalloc-internal.h          |   1 +
 src/arena.c                          | 369 +++++++++++++++++++++++++++
 src/memory.c                         |  80 ++++--
 src/os.c                             |   4 +-
 6 files changed, 435 insertions(+), 21 deletions(-)
 create mode 100644 src/arena.c
diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index 96a8924f..09fd37fb 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -231,6 +231,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index 28e96d71..1fabff5e 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -217,6 +217,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index ccf12a06..2b881ac9 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -57,6 +57,7 @@ void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocat
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 size_t     _mi_os_good_alloc_size(size_t size);
 
+
 // memory.c
 void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
 void       _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats);
diff --git a/src/arena.c b/src/arena.c
new file mode 100644
index 00000000..5f33965a
--- /dev/null
+++ b/src/arena.c
@@ -0,0 +1,369 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+#include "mimalloc-atomic.h"
+
+#include <string.h>  // memset
+
+// os.c
+void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
+void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
+int   _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
+
+/* -----------------------------------------------------------
+  Arena allocation
+----------------------------------------------------------- */
+
+#define MI_SEGMENT_ALIGN     MI_SEGMENT_SIZE
+#define MI_ARENA_BLOCK_SIZE  (4*MI_SEGMENT_ALIGN)  // 16MiB
+#define MI_MAX_ARENAS        (64)
+
+// Block info: bit 0 contains the `in_use` bit, the upper bits the
+// size in count of arena blocks.
+typedef uintptr_t mi_block_info_t;
+
+// A memory arena descriptor
+typedef struct mi_arena_s {
+  uint8_t* start;                         // the start of the memory area
+  size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
+  bool     is_zero_init;                  // is the arena zero initialized?
+  bool     is_large;                      // large OS page allocated
+  _Atomic(uintptr_t)       block_bottom;  // optimization to start the search for free blocks
+  _Atomic(mi_block_info_t) blocks[1];     // `block_count` block info's
+} mi_arena_t;
+
+
+// The available arenas
+static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
+static _Atomic(uintptr_t)   mi_arena_count; // = 0
+
+
+/* -----------------------------------------------------------
+  Arena allocations get a memory id where the lower 8 bits are
+  the arena index +1, and the upper bits the block index.
+----------------------------------------------------------- */
+
+// Use `0` as a special id for direct OS allocated memory.
+#define MI_MEMID_OS   0
+
+static size_t mi_memid_create(size_t arena_index, size_t block_index) {
+  mi_assert_internal(arena_index < 0xFE);
+  return ((block_index << 8) | ((arena_index+1) & 0xFF));
+}
+
+static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) {
+  mi_assert_internal(memid != MI_MEMID_OS);
+  *arena_index = (memid & 0xFF) - 1;
+  *block_index = (memid >> 8);
+}
+
+/* -----------------------------------------------------------
+  Block info
+----------------------------------------------------------- */
+
+static bool mi_block_is_in_use(mi_block_info_t info) {
+  return ((info&1) != 0);
+}
+
+static size_t mi_block_count(mi_block_info_t info) {
+  return (info>>1);
+}
+
+static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) {
+  return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0));
+}
+
+
+/* -----------------------------------------------------------
+  Thread safe allocation in an arena
+----------------------------------------------------------- */
+
+static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index)
+{
+  // Scan linearly through all block info's
+  // Skipping used ranges, coalescing free ranges on demand.
+  mi_assert_internal(needed_bcount > 0);
+  mi_assert_internal(start_idx <= arena->block_count);
+  mi_assert_internal(end_idx <= arena->block_count);
+  _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx];
+  _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx];
+  while (block < end) {
+    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
+    size_t bcount = mi_block_count(binfo);
+    if (mi_block_is_in_use(binfo)) {
+      // in-use, skip ahead
+      mi_assert_internal(bcount > 0);
+      block += bcount;
+    }
+    else {
+      // free blocks
+      if (bcount==0) {
+        // optimization:
+        // use 0 initialized blocks at the end, to use single atomic operation
+        // initially to reduce contention (as we don't need to split)
+        if (block + needed_bcount > end) {
+          return NULL; // does not fit
+        }
+        else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) {
+          // ouch, someone else was quicker. Try again..
+          continue;
+        }
+        else {
+          // we got it: return a pointer to the claimed memory
+          ptrdiff_t idx = (block - arena->blocks);
+          *is_zero = arena->is_zero_init;
+          *block_index = idx;
+          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
+        }
+      }
+
+      mi_assert_internal(bcount>0);
+      if (needed_bcount > bcount) {
+#if 0 // MI_NO_ARENA_COALESCE
+        block += bcount; // too small, skip to the next range
+        continue;
+#else
+        // too small, try to coalesce
+        _Atomic(mi_block_info_t)* block_next = block + bcount;
+        if (block_next >= end) {
+          return NULL; // does not fit
+        }
+        mi_block_info_t binfo_next = mi_atomic_read(block_next);
+        size_t bcount_next = mi_block_count(binfo_next);
+        if (mi_block_is_in_use(binfo_next)) {
+          // next block is in use, cannot coalesce
+          block += (bcount + bcount_next); // skip ahea over both blocks
+        }
+        else {
+          // next block is free, try to coalesce
+          // first set the next one to being used to prevent dangling ranges
+          if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) {
+            // someone else got in before us.. try again
+            continue;
+          }
+          else {
+            if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) {  // use strong to increase success chance
+              // someone claimed/coalesced the block in the meantime
+              // first free the next block again..
+              bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong
+              mi_assert(ok); UNUSED(ok);
+              // and try again
+              continue;
+            }
+            else {
+              // coalesced! try again
+              // todo: we could optimize here to immediately claim the block if the
+              // coalesced size is a fit instead of retrying. Keep it simple for now.
+              continue;
+            }
+          }
+        }
+#endif
+      }
+      else {  // needed_bcount <= bcount
+        mi_assert_internal(needed_bcount <= bcount);
+        // it fits, claim the whole block
+        if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) {
+          // ouch, someone else was quicker. Try again..
+          continue;
+        }
+        else {
+          // got it, now split off the needed part
+          if (needed_bcount < bcount) {
+            mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false));
+            mi_atomic_write(block, mi_block_info_create(needed_bcount, true));
+          }
+          // return a pointer to the claimed memory
+          ptrdiff_t idx = (block - arena->blocks);
+          *is_zero = false;
+          *block_index = idx;
+          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
+        }
+      }
+    }
+  }
+  // no success
+  return NULL;
+}
+
+// Try to reduce search time by starting from bottom and wrap around.
+static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index)
+{
+  uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom);
+  void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index);
+  if (p == NULL && bottom > 0) {
+    // try again from the start
+    p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index);
+  }
+  if (p != NULL) {
+    mi_atomic_write(&arena->block_bottom, *block_index);
+  }
+  return p;
+}
+
+/* -----------------------------------------------------------
+  Arena Allocation
+----------------------------------------------------------- */
+
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
+  mi_assert_internal(memid != NULL && tld != NULL);
+  mi_assert_internal(size > 0);
+  *memid = MI_MEMID_OS;
+  *is_zero = false;
+  bool default_large = false;
+  if (large==NULL) large = &default_large;  // ensure `large != NULL`
+
+  // try to allocate in an arena if the alignment is small enough
+  // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`.
+  if (alignment <= MI_SEGMENT_ALIGN &&
+      size >= 3*(MI_ARENA_BLOCK_SIZE/4) &&  // > 12MiB (not more than 25% waste)
+      !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB>
+     )
+  {
+    size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
+    size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
+
+    mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
+      mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
+      if (arena==NULL) break;
+      if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages
+        size_t block_index = SIZE_MAX;
+        void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index);
+        if (p != NULL) {
+          mi_assert_internal(block_index != SIZE_MAX);
+          #if MI_DEBUG>=1
+            _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
+            mi_block_info_t binfo = mi_atomic_read(block);
+            mi_assert_internal(mi_block_is_in_use(binfo));
+            mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
+          #endif
+          *memid  = mi_memid_create(i, block_index);
+          *commit = true;           // TODO: support commit on demand?
+          *large  = arena->is_large;
+          mi_assert_internal((uintptr_t)p % alignment == 0);
+          return p;
+        }
+      }
+    }
+  }
+
+  // fall back to the OS
+  *is_zero = true;
+  *memid = MI_MEMID_OS;
+  return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
+}
+
+void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld);
+}
+
+/* -----------------------------------------------------------
+  Arena free
+----------------------------------------------------------- */
+
+void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
+  mi_assert_internal(size > 0 && stats != NULL);
+  if (p==NULL) return;
+  if (size==0) return;
+  if (memid == MI_MEMID_OS) {
+    // was a direct OS allocation, pass through
+    _mi_os_free(p, size, stats);
+  }
+  else {
+    // allocated in an arena
+    size_t arena_idx;
+    size_t block_idx;
+    mi_memid_indices(memid, &arena_idx, &block_idx);
+    mi_assert_internal(arena_idx < MI_MAX_ARENAS);
+    mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
+    mi_assert_internal(arena != NULL);
+    if (arena == NULL) {
+      _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+      return;
+    }
+    mi_assert_internal(arena->block_count > block_idx);
+    if (arena->block_count <= block_idx) {
+      _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+      return;
+    }
+    _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx];
+    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
+    mi_assert_internal(mi_block_is_in_use(binfo));
+    mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
+    if (!mi_block_is_in_use(binfo)) {
+      _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
+      return;
+    };
+    bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo);
+    mi_assert_internal(ok);
+    if (!ok) {
+      _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo);
+    }
+    if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) {
+      mi_atomic_write(&arena->block_bottom, block_idx);
+    }
+  }
+}
+
+/* -----------------------------------------------------------
+  Add an arena.
+----------------------------------------------------------- */
+
+static bool mi_arena_add(mi_arena_t* arena) {
+  mi_assert_internal(arena != NULL);
+  mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
+  mi_assert_internal(arena->block_count > 0);
+  mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t)));
+
+  uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
+  if (i >= MI_MAX_ARENAS) {
+    mi_atomic_subu(&mi_arena_count, 1);
+    return false;
+  }
+  mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena);
+  return true;
+}
+
+
+/* -----------------------------------------------------------
+  Reserve a huge page arena.
+  TODO: improve OS api to just reserve and claim a huge
+  page area at once, (and return the total size).
+----------------------------------------------------------- */
+
+#include <errno.h>
+
+int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+  size_t pages_reserved_default = 0;
+  if (pages_reserved==NULL) pages_reserved = &pages_reserved_default;
+  int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved);
+  if (*pages_reserved==0) return err;
+  size_t hsize = (*pages_reserved) * GiB;
+  void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN);
+  mi_assert_internal(p != NULL);
+  if (p == NULL) return ENOMEM;
+  size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
+  size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
+  mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats);
+  if (arena == NULL) return ENOMEM;
+  arena->block_count = bcount;
+  arena->start = (uint8_t*)p;
+  arena->block_bottom = 0;
+  arena->is_large = true;
+  arena->is_zero_init = true;
+  memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
+  //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false));
+  mi_arena_add(arena);
+  return 0;
+}
diff --git a/src/memory.c b/src/memory.c
index dd03cf95..9ab7c850 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -50,6 +50,12 @@ void    _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* sta
 void*   _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
 bool    _mi_os_is_huge_reserved(void* p);
 
+// arena.c
+void    _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
+void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+
+
 // Constants
 #if (MI_INTPTR_SIZE==8)
 #define MI_HEAP_REGION_MAX_SIZE    (256 * (1ULL << 30))  // 256GiB => 16KiB for the region map
@@ -87,6 +93,7 @@ typedef struct mem_region_s {
   volatile _Atomic(uintptr_t)        map;   // in-use bit per MI_SEGMENT_SIZE block
   volatile _Atomic(mi_region_info_t) info;  // start of virtual memory area, and flags
   volatile _Atomic(uintptr_t)        dirty_mask; // bit per block if the contents are not zero'd
+  size_t   arena_memid; 
 } mem_region_t;
 
 
@@ -131,6 +138,30 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
 }
 
 
+static size_t mi_memid_create(size_t idx, size_t bitidx) {
+  return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1;
+}
+
+static size_t mi_memid_create_from_arena(size_t arena_memid) {
+  return (arena_memid << 1) | 1;
+}
+
+static bool mi_memid_is_arena(size_t id) {
+  return ((id&1)==1);
+}
+
+static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) {
+  if (mi_memid_is_arena(id)) {
+    *arena_memid = (id>>1);
+    return true;
+  }
+  else {
+    *idx = ((id>>1) / MI_REGION_MAP_BITS);
+    *bitidx = ((id>>1) % MI_REGION_MAP_BITS);
+    return false;
+  }
+}
+
 /* ----------------------------------------------------------------------------
 Commit from a region
 -----------------------------------------------------------------------------*/
@@ -153,6 +184,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
   {
     bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
     bool region_large  = *allow_large;
+    size_t arena_memid = 0;
+    void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, is_zero, &arena_memid, tld);
+    /*
     void* start = NULL;
     if (region_large) {
       start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN);
@@ -161,6 +195,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
     if (start == NULL) {
       start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
     }
+    */
     mi_assert_internal(!(region_large && !*allow_large));
 
     if (start == NULL) {
@@ -176,6 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
     info = mi_region_info_create(start,region_large,region_commit);
     if (mi_atomic_cas_strong(&region->info, info, 0)) {
       // update the region count
+      region->arena_memid = arena_memid;
       mi_atomic_increment(&regions_count);
     }
     else {
@@ -183,6 +219,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       // we assign it to a later slot instead (up to 4 tries).
       for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
         if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
+          regions[idx+i].arena_memid = arena_memid;
           mi_atomic_increment(&regions_count);
           start = NULL;
           break;
@@ -190,7 +227,8 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       }
       if (start != NULL) {
         // free it if we didn't succeed to save it to some other region
-        _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
+        _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
+        // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
       }
       // and continue with the memory at our index
       info = mi_atomic_read(&region->info);
@@ -229,7 +267,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
   mi_assert_internal(blocks_start != NULL);
   *allow_large = region_is_large;
   *p  = blocks_start;
-  *id = (idx*MI_REGION_MAP_BITS) + bitidx;
+  *id = mi_memid_create(idx, bitidx); 
   return true;
 }
 
@@ -269,7 +307,7 @@ static inline size_t mi_bsr(uintptr_t x) {
 
 // Allocate `blocks` in a `region` at `idx` of a given `size`.
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
-// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
+// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
 static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, 
                                    bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
@@ -366,15 +404,17 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
 {
   mi_assert_internal(id != NULL && tld != NULL);
   mi_assert_internal(size > 0);
-  *id = SIZE_MAX;
+  *id = 0;
   *is_zero = false;
   bool default_large = false;
   if (large==NULL) large = &default_large;  // ensure `large != NULL`  
 
-  // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`)
+  // use direct OS allocation for huge blocks or alignment 
   if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
-    *is_zero = true;
-    return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld);  // round up size
+    size_t arena_memid = 0;
+    void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld);  // round up size
+    *id = mi_memid_create_from_arena(arena_memid);
+    return p;
   }
 
   // always round size to OS page size multiple (so commit/decommit go over the entire range)
@@ -405,9 +445,10 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
 
   if (p == NULL) {
     // we could not find a place to allocate, fall back to the os directly
-    _mi_warning_message("unable to allocate from region: size %zu\n", size);
-    *is_zero = true;
-    p = _mi_os_alloc_aligned(size, alignment, commit, large, tld);
+    _mi_warning_message("unable to allocate from region: size %zu\n", size);    
+    size_t arena_memid = 0;
+    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
+    *id = mi_memid_create_from_arena(arena_memid);
   }
   else {
     tld->region_idx = idx;  // next start of search? currently not used as we use first-fit
@@ -428,18 +469,19 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
   mi_assert_internal(size > 0 && stats != NULL);
   if (p==NULL) return;
   if (size==0) return;
-  if (id == SIZE_MAX) {
-   // was a direct OS allocation, pass through
-    _mi_os_free(p, size, stats);
+  size_t arena_memid = 0;
+  size_t idx = 0;
+  size_t bitidx = 0;
+  if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) {
+   // was a direct arena allocation, pass through
+    _mi_arena_free(p, size, arena_memid, stats);
   }
   else {
     // allocated in a region
     mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return;
     // we can align the size up to page size (as we allocate that way too)
     // this ensures we fully commit/decommit/reset
-    size = _mi_align_up(size, _mi_os_page_size());
-    size_t idx = (id / MI_REGION_MAP_BITS);
-    size_t bitidx = (id % MI_REGION_MAP_BITS);
+    size = _mi_align_up(size, _mi_os_page_size());    
     size_t blocks = mi_region_block_count(size);
     size_t mask = mi_region_block_mask(blocks, bitidx);
     mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
@@ -503,11 +545,11 @@ void _mi_mem_collect(mi_stats_t* stats) {
         m = mi_atomic_read_relaxed(&region->map);
       } while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
       if (m == 0) {
-        // on success, free the whole region (unless it was huge reserved)
+        // on success, free the whole region
         bool is_eager_committed;
         void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
-        if (start != NULL && !_mi_os_is_huge_reserved(start)) {
-          _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats);
+        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
+          _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats);
         }
         // and release
         mi_atomic_write(&region->info,0);
diff --git a/src/os.c b/src/os.c
index 8f5afc5b..85cd1a83 100644
--- a/src/os.c
+++ b/src/os.c
@@ -869,13 +869,13 @@ static void mi_os_free_huge_reserved() {
 */
 
 #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP)))
-int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
   UNUSED(pages); UNUSED(max_secs);
   if (pages_reserved != NULL) *pages_reserved = 0;
   return ENOMEM; 
 }
 #else
-int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
+int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
 {
   if (pages_reserved != NULL) *pages_reserved = 0;
   if (max_secs==0) return ETIMEDOUT; // timeout 

From aaf01620f4e878d48a4d2815bd0d894f28a5f093 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 19:39:49 -0700
Subject: [PATCH 002/104] improve allocation of the huge OS page arena

---
 src/arena.c |  39 ++++++++++++-------
 src/os.c    | 110 +++++++++++++++++-----------------------------------
 2 files changed, 60 insertions(+), 89 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 5f33965a..469755f2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -6,7 +6,16 @@ terms of the MIT license. A copy of the license can be found in the file
 -----------------------------------------------------------------------------*/
 
 /* ----------------------------------------------------------------------------
+"Arenas" are fixed area's of OS memory from which we can allocate
+large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to
+allocate in one arena consisting of huge OS pages -- otherwise it 
+delegates to direct allocation from the OS.
 
+In the future, we can expose an API to manually add more arenas which
+is sometimes needed for embedded devices or shared memory for example.
+
+The arena allocation needs to be thread safe and we use a lock-free scan
+with on-demand coalescing.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
@@ -16,8 +25,8 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // os.c
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
-void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
-int   _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
+int   _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
+void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
 
 /* -----------------------------------------------------------
   Arena allocation
@@ -338,25 +347,27 @@ static bool mi_arena_add(mi_arena_t* arena) {
 
 /* -----------------------------------------------------------
   Reserve a huge page arena.
-  TODO: improve OS api to just reserve and claim a huge
-  page area at once, (and return the total size).
 ----------------------------------------------------------- */
-
-#include <errno.h>
+#include <errno.h> // ENOMEM
 
 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
   size_t pages_reserved_default = 0;
   if (pages_reserved==NULL) pages_reserved = &pages_reserved_default;
-  int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved);
-  if (*pages_reserved==0) return err;
-  size_t hsize = (*pages_reserved) * GiB;
-  void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN);
-  mi_assert_internal(p != NULL);
-  if (p == NULL) return ENOMEM;
+  size_t hsize = 0;
+  void* p = NULL;
+  int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize);
+  _mi_verbose_message("reserved %zu huge pages\n", *pages_reserved);
+  if (p==NULL) return err;
+  // err might be != 0 but that is fine, we just got less pages.
+  mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages);
   size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
   size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
-  mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats);
-  if (arena == NULL) return ENOMEM;
+  mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main);
+  if (arena == NULL) {
+    *pages_reserved = 0;
+    _mi_os_free(p, hsize, &_mi_stats_main);
+    return ENOMEM;
+  }
   arena->block_count = bcount;
   arena->start = (uint8_t*)p;
   arena->block_bottom = 0;
diff --git a/src/os.c b/src/os.c
index 85cd1a83..b7bffa64 100644
--- a/src/os.c
+++ b/src/os.c
@@ -36,8 +36,6 @@ terms of the MIT license. A copy of the license can be found in the file
   large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
 ----------------------------------------------------------- */
 bool    _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
-bool    _mi_os_is_huge_reserved(void* p);
-void*   _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
 
 static void* mi_align_up_ptr(void* p, size_t alignment) {
   return (void*)_mi_align_up((uintptr_t)p, alignment);
@@ -184,7 +182,7 @@ void _mi_os_init() {
 
 static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats)
 {
-  if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true;
+  if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr)
   bool err = false;
 #if defined(_WIN32)
   err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
@@ -628,7 +626,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   *is_zero = false;
   size_t csize;
   void* start = mi_os_page_align_areax(conservative, addr, size, &csize);
-  if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true;
+  if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr))
   int err = 0;
   if (commit) {
     _mi_stat_increase(&stats->committed, csize);
@@ -684,7 +682,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   // page align conservatively within the range
   size_t csize;
   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
-  if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true;
+  if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr)
   if (reset) _mi_stat_increase(&stats->reset, csize);
         else _mi_stat_decrease(&stats->reset, csize);
   if (!reset) return true; // nothing to do on unreset!
@@ -758,9 +756,11 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   size_t csize = 0;
   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
   if (csize == 0) return false;
+  /*
   if (_mi_os_is_huge_reserved(addr)) {
 	  _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
   }
+  */
   int err = 0;
 #ifdef _WIN32
   DWORD oldprotect = 0;
@@ -816,79 +816,42 @@ will be reused.
 -----------------------------------------------------------------------------*/
 #define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30)  // 1GiB
 
-typedef struct mi_huge_info_s {
-  volatile _Atomic(void*)  start;     // start of huge page area (32TiB)
-  volatile _Atomic(size_t) reserved;  // total reserved size
-  volatile _Atomic(size_t) used;      // currently allocated
-} mi_huge_info_t;
-
-static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) };
-
-bool _mi_os_is_huge_reserved(void* p) {
-  return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL && 
-          p >= mi_atomic_read_ptr(&os_huge_reserved.start) &&
-          (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved));
-}
-
-void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment)
-{
-  // only allow large aligned allocations (e.g. regions)
-  if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL;
-  if (try_alignment > MI_SEGMENT_SIZE) return NULL;  
-  if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL;
-  if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full
-
-  // always aligned
-  mi_assert_internal(mi_atomic_read(&os_huge_reserved.used) % MI_SEGMENT_SIZE == 0 );
-  mi_assert_internal( (uintptr_t)mi_atomic_read_ptr(&os_huge_reserved.start) % MI_SEGMENT_SIZE == 0 );
-  
-  // try to reserve space
-  size_t base = mi_atomic_addu( &os_huge_reserved.used, size );
-  if ((base + size) > os_huge_reserved.reserved) {
-    // "free" our over-allocation
-    mi_atomic_subu( &os_huge_reserved.used, size);
-    return NULL;
-  }
-
-  // success!
-  uint8_t* p = (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + base;
-  mi_assert_internal( (uintptr_t)p % MI_SEGMENT_SIZE == 0 );
-  return p;
-}
-
-/*
-static void mi_os_free_huge_reserved() {
-  uint8_t* addr = os_huge_reserved.start;
-  size_t total  = os_huge_reserved.reserved;
-  os_huge_reserved.reserved = 0;
-  os_huge_reserved.start = NULL;
-  for( size_t current = 0; current < total; current += MI_HUGE_OS_PAGE_SIZE) {
-    _mi_os_free(addr + current, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
-  }
-}
-*/
 
 #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP)))
-int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** start, size_t* pages_reserved, size_t* size) mi_attr_noexcept {
   UNUSED(pages); UNUSED(max_secs);
+  if (start != NULL) *start = NULL;
   if (pages_reserved != NULL) *pages_reserved = 0;
+  if (size != NULL) *size = 0;
   return ENOMEM; 
 }
 #else
-int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
+static _Atomic(uintptr_t) huge_top; // = 0
+
+int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept 
 {
-  if (pages_reserved != NULL) *pages_reserved = 0;
+  *pstart = NULL;
+  *pages_reserved = 0;
+  *psize = 0;
   if (max_secs==0) return ETIMEDOUT; // timeout 
   if (pages==0) return 0;            // ok
-  if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return ETIMEDOUT; // already reserved
 
-  // Set the start address after the 32TiB area
-  uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address
-  #if (MI_SECURE>0 || MI_DEBUG==0)     // security: randomize start of huge pages unless in debug mode
-  uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages);
-  start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
-  #endif
+  // Atomically claim a huge address range
+  size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
+  uint8_t* start;
+  do {
+    start = (uint8_t*)mi_atomic_addu(&huge_top, size);  
+    if (start == NULL) {
+      uintptr_t top = ((uintptr_t)32 << 40);  // 32TiB virtual start address
+      #if (MI_SECURE>0 || MI_DEBUG==0)        // security: randomize start of huge pages unless in debug mode
+      uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages);
+      top += ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
+      #endif    
+      mi_atomic_cas_strong(&huge_top, top, 0);
+    }
+  } while (start == NULL);
 
+  
   // Allocate one page at the time but try to place them contiguously
   // We allocate one page at the time to be able to abort if it takes too long
   double start_t = _mi_clock_start();
@@ -925,16 +888,13 @@ int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_r
     }
     // success, record it
     if (page==0) {
-      mi_atomic_write_ptr(&os_huge_reserved.start, addr);  // don't switch the order of these writes
-      mi_atomic_write(&os_huge_reserved.reserved, MI_HUGE_OS_PAGE_SIZE);
+      *pstart = addr;
     }
-    else {
-      mi_atomic_addu(&os_huge_reserved.reserved,MI_HUGE_OS_PAGE_SIZE);
-    }
-    _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); 
+    *psize += MI_HUGE_OS_PAGE_SIZE; 
+    *pages_reserved += 1;
+    _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
     _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
-    if (pages_reserved != NULL) { *pages_reserved = page + 1; }
-
+    
     // check for timeout
     double elapsed = _mi_clock_end(start_t);
     if (elapsed > max_secs) return ETIMEDOUT; 
@@ -943,7 +903,7 @@ int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_r
       if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout
     }
   }  
-  _mi_verbose_message("reserved %zu huge pages\n", pages);
+  mi_assert_internal(*psize == size);
   return 0;
 }
 #endif

From a6499be074a52232ed131eeabb3bd8040f2743c3 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 1 Nov 2019 19:53:07 -0700
Subject: [PATCH 003/104] initial numa support for arenas

---
 include/mimalloc-internal.h |   1 +
 include/mimalloc.h          |   8 +-
 src/arena.c                 | 128 +++++++++++++-----
 src/init.c                  |   2 +-
 src/options.c               |   3 +-
 src/os.c                    | 252 +++++++++++++++++++-----------------
 6 files changed, 241 insertions(+), 153 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 2b881ac9..dd677a02 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -56,6 +56,7 @@ void       _mi_os_init(void);                                      // called fro
 void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 size_t     _mi_os_good_alloc_size(size_t size);
+int        _mi_os_numa_node(void);
 
 
 // memory.c
diff --git a/include/mimalloc.h b/include/mimalloc.h
index b63ed79d..b155aca6 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -228,9 +228,14 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
 
 // Experimental
 mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
-mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
 mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
 
+mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept;
+mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept;
+
+// deprecated
+mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
+
 // ------------------------------------------------------
 // Convenience
 // ------------------------------------------------------
@@ -271,6 +276,7 @@ typedef enum mi_option_e {
   mi_option_eager_commit_delay,
   mi_option_segment_reset,
   mi_option_os_tag,
+  mi_option_max_numa_node,
   _mi_option_last
 } mi_option_t;
 
diff --git a/src/arena.c b/src/arena.c
index 469755f2..5bc3900c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -25,8 +25,10 @@ with on-demand coalescing.
 
 // os.c
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
-int   _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
+//int   _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize);
+int   _mi_os_numa_node_count(void);
 
 /* -----------------------------------------------------------
   Arena allocation
@@ -44,6 +46,7 @@ typedef uintptr_t mi_block_info_t;
 typedef struct mi_arena_s {
   uint8_t* start;                         // the start of the memory area
   size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
+  int      numa_node;                     // associated NUMA node
   bool     is_zero_init;                  // is the arena zero initialized?
   bool     is_large;                      // large OS page allocated
   _Atomic(uintptr_t)       block_bottom;  // optimization to start the search for free blocks
@@ -223,7 +226,31 @@ static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_ze
   Arena Allocation
 ----------------------------------------------------------- */
 
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
+static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, 
+                                    bool* commit, bool* large, bool* is_zero,
+                                    size_t* memid) 
+{
+  size_t block_index = SIZE_MAX;
+  void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index);
+  if (p != NULL) {
+    mi_assert_internal(block_index != SIZE_MAX);
+#if MI_DEBUG>=1
+    _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
+    mi_block_info_t binfo = mi_atomic_read(block);
+    mi_assert_internal(mi_block_is_in_use(binfo));
+    mi_assert_internal(mi_block_count(binfo) >= needed_bcount);
+#endif
+    *memid = mi_memid_create(arena_index, block_index);
+    *commit = true;           // TODO: support commit on demand?
+    *large = arena->is_large;
+  }
+  return p;
+}
+
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, 
+                              bool* commit, bool* large, bool* is_zero, 
+                              size_t* memid, mi_os_tld_t* tld) 
+{
   mi_assert_internal(memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
   *memid = MI_MEMID_OS;
@@ -240,33 +267,36 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
   {
     size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
     size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
+    int numa_node = _mi_os_numa_node(); // current numa node
 
     mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+    // try numa affine allocation
     for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
       mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
-      if (arena==NULL) break;
-      if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages
-        size_t block_index = SIZE_MAX;
-        void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index);
-        if (p != NULL) {
-          mi_assert_internal(block_index != SIZE_MAX);
-          #if MI_DEBUG>=1
-            _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
-            mi_block_info_t binfo = mi_atomic_read(block);
-            mi_assert_internal(mi_block_is_in_use(binfo));
-            mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
-          #endif
-          *memid  = mi_memid_create(i, block_index);
-          *commit = true;           // TODO: support commit on demand?
-          *large  = arena->is_large;
-          mi_assert_internal((uintptr_t)p % alignment == 0);
-          return p;
-        }
+      if (arena==NULL) break; // end reached
+      if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
+          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+      { 
+        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid);
+        mi_assert_internal((uintptr_t)p % alignment == 0);
+        if (p != NULL) return p;
+      }
+    }
+    // try from another numa node instead..
+    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
+      mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
+      if (arena==NULL) break; // end reached
+      if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
+          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+      {
+        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid);
+        mi_assert_internal((uintptr_t)p % alignment == 0);
+        if (p != NULL) return p;
       }
     }
   }
 
-  // fall back to the OS
+  // finally, fall back to the OS
   *is_zero = true;
   *memid = MI_MEMID_OS;
   return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
@@ -350,31 +380,61 @@ static bool mi_arena_add(mi_arena_t* arena) {
 ----------------------------------------------------------- */
 #include <errno.h> // ENOMEM
 
-int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
-  size_t pages_reserved_default = 0;
-  if (pages_reserved==NULL) pages_reserved = &pages_reserved_default;
+// reserve at a specific numa node
+static int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
   size_t hsize = 0;
-  void* p = NULL;
-  int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize);
-  _mi_verbose_message("reserved %zu huge pages\n", *pages_reserved);
-  if (p==NULL) return err;
-  // err might be != 0 but that is fine, we just got less pages.
-  mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages);
+  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize);
+  if (p==NULL) return ENOMEM;
+  _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages);
+  
   size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
-  size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
-  mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main);
+  size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t));  // one too much
+  mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) {
-    *pages_reserved = 0;
     _mi_os_free(p, hsize, &_mi_stats_main);
     return ENOMEM;
   }
   arena->block_count = bcount;
   arena->start = (uint8_t*)p;
   arena->block_bottom = 0;
+  arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
   arena->is_large = true;
   arena->is_zero_init = true;
   memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
-  //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false));
   mi_arena_add(arena);
   return 0;
 }
+
+
+// reserve huge pages evenly among all numa nodes. 
+int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
+  if (pages == 0) return 0;
+
+  // pages per numa node
+  int numa_count = _mi_os_numa_node_count();
+  if (numa_count <= 0) numa_count = 1;
+  size_t pages_per = pages / numa_count;
+  if (pages_per == 0) pages_per = 1;
+  
+  // reserve evenly among numa nodes
+  for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
+    int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node);
+    if (err) return err;
+    if (pages < pages_per) {
+      pages = 0;
+    }
+    else {
+      pages -= pages_per;
+    }
+  }
+
+  return 0;
+}
+
+int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+  _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
+  if (pages_reserved != NULL) *pages_reserved = 0;
+  int err = mi_reserve_huge_os_pages_interleave(pages);  
+  if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
+  return err;
+}
diff --git a/src/init.c b/src/init.c
index e15d82eb..138b54aa 100644
--- a/src/init.c
+++ b/src/init.c
@@ -435,7 +435,7 @@ static void mi_process_load(void) {
   if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
     size_t pages     = mi_option_get(mi_option_reserve_huge_os_pages);
     double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB)
-    mi_reserve_huge_os_pages(pages, max_secs, NULL);
+    mi_reserve_huge_os_pages_interleave(pages);
   }
 }
 
diff --git a/src/options.c b/src/options.c
index a49c46ed..32f13d54 100644
--- a/src/options.c
+++ b/src/options.c
@@ -66,7 +66,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // note: cannot enable this if secure is on
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
-  { 100, UNINIT, MI_OPTION(os_tag) }             // only apple specific for now but might serve more or less related purpose
+  { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
+  { 256, UNINIT, MI_OPTION(max_numa_node) }      // maximum allowed numa node
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/src/os.c b/src/os.c
index b7bffa64..c0564174 100644
--- a/src/os.c
+++ b/src/os.c
@@ -170,7 +170,7 @@ void _mi_os_init() {
     os_alloc_granularity = os_page_size;
   }
   if (mi_option_is_enabled(mi_option_large_os_pages)) {
-    large_os_page_size = (1UL << 21); // 2MiB
+    large_os_page_size = 2*MiB;
   }
 }
 #endif
@@ -207,31 +207,6 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size);
 
 #ifdef _WIN32
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
-#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
-  // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
-  if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */
-    && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0
-    && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0)
-    && pNtAllocateVirtualMemoryEx != NULL)
-  {
-    #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
-    #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
-    #endif
-    MEM_EXTENDED_PARAMETER param = { 0, 0 };
-    param.Type = 5; // == MemExtendedParameterAttributeFlags;
-    param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
-    SIZE_T psize = size;
-    void*  base  = addr;
-    NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, &param, 1);
-    if (err == 0) {
-      return base;
-    }
-    else {
-      // else fall back to regular large OS pages
-      _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err);
-    }
-  }
-#endif
 #if (MI_INTPTR_SIZE >= 8) 
   // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations
   void* hint;
@@ -364,7 +339,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       lflags |= MAP_HUGETLB;
       #endif
       #ifdef MAP_HUGE_1GB
-      if ((size % ((uintptr_t)1 << 30)) == 0) {
+      if ((size % GiB) == 0) {
         lflags |= MAP_HUGE_1GB;
       }
       else
@@ -400,10 +375,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
     p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);    
     #if defined(MADV_HUGEPAGE)
     // Many Linux systems don't allow MAP_HUGETLB but they support instead
-    // transparent huge pages (TPH). It is not required to call `madvise` with MADV_HUGE
+    // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE
     // though since properly aligned allocations will already use large pages if available
     // in that case -- in particular for our large regions (in `memory.c`).
-    // However, some systems only allow TPH if called with explicit `madvise`, so
+    // However, some systems only allow THP if called with explicit `madvise`, so
     // when large OS pages are enabled for mimalloc, we call `madvice` anyways.
     if (allow_large && use_large_os_page(size, try_alignment)) {
       if (madvise(p, size, MADV_HUGEPAGE) == 0) {
@@ -810,101 +785,146 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
 
 
 /* ----------------------------------------------------------------------------
-Support for huge OS pages (1Gib) that are reserved up-front and never
-released. Only regions are allocated in here (see `memory.c`) so the memory
-will be reused.
+Support for allocating huge OS pages (1Gib) that are reserved up-front 
+and possibly associated with a specific NUMA node. (use `numa_node>=0`)
 -----------------------------------------------------------------------------*/
-#define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30)  // 1GiB
+#define MI_HUGE_OS_PAGE_SIZE  (GiB)  
 
+#if defined(WIN32) && (MI_INTPTR_SIZE >= 8)
+static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) 
+{  
+  mi_assert_internal(size%GiB == 0);
 
-#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP)))
-int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** start, size_t* pages_reserved, size_t* size) mi_attr_noexcept {
-  UNUSED(pages); UNUSED(max_secs);
-  if (start != NULL) *start = NULL;
-  if (pages_reserved != NULL) *pages_reserved = 0;
-  if (size != NULL) *size = 0;
-  return ENOMEM; 
-}
-#else
-static _Atomic(uintptr_t) huge_top; // = 0
-
-int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept 
-{
-  *pstart = NULL;
-  *pages_reserved = 0;
-  *psize = 0;
-  if (max_secs==0) return ETIMEDOUT; // timeout 
-  if (pages==0) return 0;            // ok
-
-  // Atomically claim a huge address range
-  size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
-  uint8_t* start;
-  do {
-    start = (uint8_t*)mi_atomic_addu(&huge_top, size);  
-    if (start == NULL) {
-      uintptr_t top = ((uintptr_t)32 << 40);  // 32TiB virtual start address
-      #if (MI_SECURE>0 || MI_DEBUG==0)        // security: randomize start of huge pages unless in debug mode
-      uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages);
-      top += ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
-      #endif    
-      mi_atomic_cas_strong(&huge_top, top, 0);
-    }
-  } while (start == NULL);
-
+  #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
+  DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
+  MEM_EXTENDED_PARAMETER params[4] = { {0,0},{0,0},{0,0},{0,0} };
+  MEM_ADDRESS_REQUIREMENTS reqs = {0,0,0};
+  reqs.HighestEndingAddress = NULL;
+  reqs.LowestStartingAddress = NULL;
+  reqs.Alignment = MI_SEGMENT_SIZE;
   
-  // Allocate one page at the time but try to place them contiguously
-  // We allocate one page at the time to be able to abort if it takes too long
-  double start_t = _mi_clock_start();
-  uint8_t* addr = start;  // current top of the allocations
-  for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) {
-    // allocate a page
-    void* p = NULL; 
-    bool is_large = true;
-    #ifdef _WIN32
-    if (page==0) { mi_win_enable_large_os_pages(); }
-    p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large);
-    #elif defined(MI_OS_USE_MMAP)
-    p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large);
-    #else 
-    // always fail
-    #endif  
-    
-    // Did we succeed at a contiguous address?
-    if (p != addr) {
-      // no success, issue a warning and return with an error 
-      if (p != NULL) {
-        _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); 
-        _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main );
-      }
-      else {
-        #ifdef _WIN32
-        int err = GetLastError();
-        #else
-        int err = errno;
-        #endif
-        _mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err);
-      }
-      return ENOMEM;  
+  // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages  
+  if (pNtAllocateVirtualMemoryEx != NULL) {
+    #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
+    #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
+    #endif
+    params[0].Type = MemExtendedParameterAddressRequirements;
+    params[0].Pointer = &reqs;
+    params[1].Type = 5; // == MemExtendedParameterAttributeFlags;
+    params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
+    size_t param_count = 2;
+    if (numa_node >= 0) {
+      param_count++;
+      params[2].Type = MemExtendedParameterNumaNode;
+      params[2].ULong = (unsigned)numa_node;
     }
-    // success, record it
-    if (page==0) {
-      *pstart = addr;
+    SIZE_T psize = size;
+    void* base = NULL;
+    NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
+    if (err == 0) {
+      return base;
     }
-    *psize += MI_HUGE_OS_PAGE_SIZE; 
-    *pages_reserved += 1;
-    _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
-    _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
-    
-    // check for timeout
-    double elapsed = _mi_clock_end(start_t);
-    if (elapsed > max_secs) return ETIMEDOUT; 
-    if (page >= 1) {
-      double estimate = ((elapsed / (double)(page+1)) * (double)pages);
-      if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout
+    else {
+      // fall back to regular huge pages    
+      _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err);
     }
   }  
-  mi_assert_internal(*psize == size);
-  return 0;
+  // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation
+  if (pVirtualAlloc2 != NULL) {
+    params[0].Type = MemExtendedParameterAddressRequirements;
+    params[0].Pointer = &reqs;
+    size_t param_count = 1;
+    if (numa_node >= 0) {
+      param_count++;
+      params[1].Type = MemExtendedParameterNumaNode;
+      params[1].ULong = (unsigned)numa_node;
+    }
+    return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count);
+  }
+  #endif
+  return NULL; // give up on older Windows.. 
+}
+#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
+#ifdef MI_HAS_NUMA
+#include <numaif.h> // mbind, and use -lnuma
+#endif
+static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
+  mi_assert_internal(size%GiB == 0);
+  bool is_large = true;
+  void* p = mi_unix_mmap(NULL, MI_HUGE_OS_PAGE_SIZE, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
+  if (p == NULL) return NULL;
+  #ifdef MI_HAS_NUMA  
+  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) {
+    uintptr_t numa_mask = (1UL << numa_node);
+    long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
+    if (err != 0) {
+      _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
+    }
+  }
+  #endif
+  return p;
+}
+#else 
+static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
+  return NULL;
 }
 #endif
 
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) {
+  if (psize != NULL) *psize = 0;
+  size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
+  void* p = mi_os_alloc_huge_os_pagesx(size, numa_node);
+  if (p==NULL) return NULL;
+  if (psize != NULL) *psize = size;
+  _mi_stat_increase(&_mi_stats_main.committed, size);
+  _mi_stat_increase(&_mi_stats_main.reserved, size);
+  return p;
+}
+
+#ifdef WIN32
+static int mi_os_numa_nodex(void) {
+  PROCESSOR_NUMBER pnum;
+  USHORT numa_node = 0;
+  GetCurrentProcessorNumberEx(&pnum);
+  GetNumaProcessorNodeEx(&pnum,&numa_node);
+  return (int)numa_node; 
+}
+
+static int mi_os_numa_node_countx(void) {
+  ULONG numa_max = 0;
+  GetNumaHighestNodeNumber(&numa_max);
+  return (int)(numa_max + 1);
+}
+#elif MI_HAS_NUMA
+#include <numa.h>
+static int mi_os_numa_nodex(void) {
+  return numa_preferred();
+}
+static int mi_os_numa_node_countx(void) {
+  return (numa_max_node() + 1);
+}
+#else
+static int mi_os_numa_nodex(void) {
+  return 0;
+}
+static int mi_os_numa_node_countx(void) {
+  return 1;
+}
+#endif
+
+int _mi_os_numa_node_count(void) {
+  long ncount = mi_os_numa_node_countx();
+  // never more than max numa node and at least 1
+  long nmax  = 1 + mi_option_get(mi_option_max_numa_node);
+  if (ncount > nmax) ncount = nmax;
+  if (ncount <= 0) ncount = 1;
+  return ncount;
+}
+
+int _mi_os_numa_node(void) {
+  int nnode = mi_os_numa_nodex();
+  // never more than the node count
+  int ncount = _mi_os_numa_node_count();
+  if (nnode >= ncount) { nnode = nnode % ncount; }  
+  return nnode;
+}

From 3fadf4abaf5ee91c38c6e593a1faabb28d9ab2f9 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 1 Nov 2019 20:01:08 -0700
Subject: [PATCH 004/104] initial numa awareness for regions

---
 src/memory.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index 9ab7c850..02e82e4d 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -45,10 +45,8 @@ bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-void*   _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
-void    _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
-void*   _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
-bool    _mi_os_is_huge_reserved(void* p);
+//void*   _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
+//void    _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
 
 // arena.c
 void    _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
@@ -93,7 +91,8 @@ typedef struct mem_region_s {
   volatile _Atomic(uintptr_t)        map;   // in-use bit per MI_SEGMENT_SIZE block
   volatile _Atomic(mi_region_info_t) info;  // start of virtual memory area, and flags
   volatile _Atomic(uintptr_t)        dirty_mask; // bit per block if the contents are not zero'd
-  size_t   arena_memid; 
+  volatile _Atomic(uintptr_t)        numa_node;  // associated numa node + 1 (so 0 is no association)
+  size_t   arena_memid;  // if allocated from a (huge page) arena
 } mem_region_t;
 
 
@@ -212,6 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
     if (mi_atomic_cas_strong(&region->info, info, 0)) {
       // update the region count
       region->arena_memid = arena_memid;
+      mi_atomic_write(&region->numa_node, _mi_os_numa_node() + 1);
       mi_atomic_increment(&regions_count);
     }
     else {
@@ -220,6 +220,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
         if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
           regions[idx+i].arena_memid = arena_memid;
+          mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node() + 1);
           mi_atomic_increment(&regions_count);
           start = NULL;
           break;
@@ -365,15 +366,18 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
 // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, 
-                                       bool* commit, bool* allow_large, bool* is_zero, 
-                                       void** p, size_t* id, mi_os_tld_t* tld)
+static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks, size_t size,
+  bool* commit, bool* allow_large, bool* is_zero,
+  void** p, size_t* id, mi_os_tld_t* tld)
 {
   // check if there are available blocks in the region..
   mi_assert_internal(idx < MI_REGION_MAX);
   mem_region_t* region = &regions[idx];
   uintptr_t m = mi_atomic_read_relaxed(&region->map);
-  if (m != MI_REGION_MAP_FULL) {  // some bits are zero    
+  int rnode = ((int)mi_atomic_read_relaxed(&region->numa_node)) - 1;
+  if ((rnode < 0 || rnode == numa_node) &&  // fits current numa node
+      (m != MI_REGION_MAP_FULL))            // and some bits are zero    
+  {
     bool ok = (*commit || *allow_large); // committing or allow-large is always ok
     if (!ok) {
       // otherwise skip incompatible regions if possible. 
@@ -426,19 +430,20 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
   mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
 
   // find a range of free blocks
+  int numa_node = _mi_os_numa_node();
   void* p = NULL;
   size_t count = mi_atomic_read(&regions_count);
   size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
   for (size_t visited = 0; visited < count; visited++, idx++) {
     if (idx >= count) idx = 0;  // wrap around
-    if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
+    if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
     if (p != NULL) break;
   }
 
   if (p == NULL) {
     // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions
     for (idx = count; idx < mi_atomic_read_relaxed(&regions_count) + 8 && idx < MI_REGION_MAX; idx++) {
-      if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
+      if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
       if (p != NULL) break;
     }
   }

From 2d10c78587d6cf781ffb40c24cb727ecff625841 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 1 Nov 2019 20:19:00 -0700
Subject: [PATCH 005/104] fix linux compilation

---
 CMakeLists.txt | 1 +
 src/arena.c    | 3 ++-
 src/init.c     | 2 +-
 src/os.c       | 4 +++-
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 81cc339a..e9eb6feb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}")
 set(mi_sources
     src/stats.c
     src/os.c
+    src/arena.c
     src/memory.c
     src/segment.c
     src/page.c
diff --git a/src/arena.c b/src/arena.c
index 5bc3900c..bb1c1c10 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -381,7 +381,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
 #include <errno.h> // ENOMEM
 
 // reserve at a specific numa node
-static int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
+int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
   size_t hsize = 0;
   void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize);
   if (p==NULL) return ENOMEM;
@@ -432,6 +432,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
 }
 
 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+  UNUSED(max_secs);
   _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
   if (pages_reserved != NULL) *pages_reserved = 0;
   int err = mi_reserve_huge_os_pages_interleave(pages);  
diff --git a/src/init.c b/src/init.c
index 138b54aa..0813fddd 100644
--- a/src/init.c
+++ b/src/init.c
@@ -434,7 +434,7 @@ static void mi_process_load(void) {
 
   if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
     size_t pages     = mi_option_get(mi_option_reserve_huge_os_pages);
-    double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB)
+    // double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB)
     mi_reserve_huge_os_pages_interleave(pages);
   }
 }
diff --git a/src/os.c b/src/os.c
index c0564174..2bb3ee3c 100644
--- a/src/os.c
+++ b/src/os.c
@@ -851,7 +851,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
 static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
   mi_assert_internal(size%GiB == 0);
   bool is_large = true;
-  void* p = mi_unix_mmap(NULL, MI_HUGE_OS_PAGE_SIZE, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
+  void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (p == NULL) return NULL;
   #ifdef MI_HAS_NUMA  
   if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) {
@@ -861,6 +861,8 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
       _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
     }
   }
+  #else
+  UNUSED(numa_node);
   #endif
   return p;
 }

From 57dd69265ad294e7cdfcc13ef7ecb69b7c5d61b1 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 1 Nov 2019 20:30:01 -0700
Subject: [PATCH 006/104] normalize numa node

---
 src/arena.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arena.c b/src/arena.c
index bb1c1c10..381d4486 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -383,6 +383,8 @@ static bool mi_arena_add(mi_arena_t* arena) {
 // reserve at a specific numa node
 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
   size_t hsize = 0;
+  if (numa_node < -1) numa_node = -1;
+  if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
   void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize);
   if (p==NULL) return ENOMEM;
   _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages);

From 2c12d7f2234b25308478e22c9342a07623b6f891 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 1 Nov 2019 22:01:52 -0700
Subject: [PATCH 007/104] optimized numa calls; better Linux support

---
 CMakeLists.txt              |  12 ++++
 include/mimalloc-internal.h |   2 +-
 include/mimalloc-types.h    |   1 +
 src/arena.c                 |   2 +-
 src/init.c                  |   3 +-
 src/memory.c                |   6 +-
 src/os.c                    | 114 ++++++++++++++++++++++++------------
 7 files changed, 97 insertions(+), 43 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e9eb6feb..1e96c237 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,8 @@
 cmake_minimum_required(VERSION 3.0)
 project(libmimalloc C CXX)
 include("cmake/mimalloc-config-version.cmake")
+include("CheckIncludeFile")
+
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_CXX_STANDARD 17)
 
@@ -88,6 +90,16 @@ if(MI_USE_CXX MATCHES "ON")
   set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX )
 endif()
 
+CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H)
+if(MI_HAVE_NUMA_H)
+  list(APPEND mi_defines MI_HAS_NUMA)
+  list(APPEND mi_libraries numa)
+else()
+  if (NOT(WIN32))
+    message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)")
+  endif()
+endif()
+
 # Compiler flags
 if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
   list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas)
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index dd677a02..b4d3351d 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -56,7 +56,7 @@ void       _mi_os_init(void);                                      // called fro
 void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 size_t     _mi_os_good_alloc_size(size_t size);
-int        _mi_os_numa_node(void);
+int        _mi_os_numa_node(mi_os_tld_t* tld);
 
 
 // memory.c
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 99b6b22b..0208d5c7 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -413,6 +413,7 @@ typedef struct mi_segments_tld_s {
 // OS thread local data
 typedef struct mi_os_tld_s {
   size_t              region_idx;   // start point for next allocation
+  int                 numa_node;    // numa node associated with this thread
   mi_stats_t*         stats;        // points to tld stats
 } mi_os_tld_t;
 
diff --git a/src/arena.c b/src/arena.c
index 381d4486..7eb755c4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -267,7 +267,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
   {
     size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
     size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
-    int numa_node = _mi_os_numa_node(); // current numa node
+    int numa_node = _mi_os_numa_node(tld); // current numa node
 
     mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
     // try numa affine allocation
diff --git a/src/init.c b/src/init.c
index 0813fddd..166ca451 100644
--- a/src/init.c
+++ b/src/init.c
@@ -99,7 +99,7 @@ static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
   { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
-  { 0, tld_main_stats },       // os
+  { 0, -1, tld_main_stats },   // os
   { MI_STATS_NULL }            // stats
 };
 
@@ -218,6 +218,7 @@ static bool _mi_heap_init(void) {
     memset(tld, 0, sizeof(*tld));
     tld->heap_backing = heap;
     tld->segments.stats = &tld->stats;
+    tld->os.numa_node = -1;
     tld->os.stats = &tld->stats;
     _mi_heap_default = heap;
   }
diff --git a/src/memory.c b/src/memory.c
index 02e82e4d..a425393c 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -211,7 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
     if (mi_atomic_cas_strong(&region->info, info, 0)) {
       // update the region count
       region->arena_memid = arena_memid;
-      mi_atomic_write(&region->numa_node, _mi_os_numa_node() + 1);
+      mi_atomic_write(&region->numa_node, _mi_os_numa_node(tld) + 1);
       mi_atomic_increment(&regions_count);
     }
     else {
@@ -220,7 +220,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
         if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
           regions[idx+i].arena_memid = arena_memid;
-          mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node() + 1);
+          mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
           mi_atomic_increment(&regions_count);
           start = NULL;
           break;
@@ -430,7 +430,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
   mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
 
   // find a range of free blocks
-  int numa_node = _mi_os_numa_node();
+  int numa_node = _mi_os_numa_node(tld);
   void* p = NULL;
   size_t count = mi_atomic_read(&regions_count);
   size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
diff --git a/src/os.c b/src/os.c
index 2bb3ee3c..677d0ea2 100644
--- a/src/os.c
+++ b/src/os.c
@@ -97,7 +97,7 @@ typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*
 static PVirtualAlloc2 pVirtualAlloc2 = NULL;
 static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
 
-static bool mi_win_enable_large_os_pages() 
+static bool mi_win_enable_large_os_pages()
 {
   if (large_os_page_size > 0) return true;
 
@@ -148,10 +148,10 @@ void _mi_os_init(void) {
     FreeLibrary(hDll);
   }
   hDll = LoadLibrary(TEXT("ntdll.dll"));
-  if (hDll != NULL) {    
+  if (hDll != NULL) {
     pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
     FreeLibrary(hDll);
-  }  
+  }
   if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
     mi_win_enable_large_os_pages();
   }
@@ -191,7 +191,7 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
 #else
   err = (munmap(addr, size) == -1);
 #endif
-  if (was_committed) _mi_stat_decrease(&stats->committed, size); 
+  if (was_committed) _mi_stat_decrease(&stats->committed, size);
   _mi_stat_decrease(&stats->reserved, size);
   if (err) {
 #pragma warning(suppress:4996)
@@ -207,14 +207,14 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size);
 
 #ifdef _WIN32
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
-#if (MI_INTPTR_SIZE >= 8) 
+#if (MI_INTPTR_SIZE >= 8)
   // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations
   void* hint;
   if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) {
     return VirtualAlloc(hint, size, flags, PAGE_READWRITE);
   }
 #endif
-#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)  
+#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   // on modern Windows try use VirtualAlloc2 for aligned allocation
   if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
     MEM_ADDRESS_REQUIREMENTS reqs = { 0 };
@@ -232,7 +232,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
   mi_assert_internal(!(large_only && !allow_large));
   static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
   void* p = NULL;
-  if ((large_only || use_large_os_page(size, try_alignment)) 
+  if ((large_only || use_large_os_page(size, try_alignment))
       && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
     uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
     if (!large_only && try_ok > 0) {
@@ -372,7 +372,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   }
   if (p == NULL) {
     *is_large = false;
-    p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);    
+    p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
     #if defined(MADV_HUGEPAGE)
     // Many Linux systems don't allow MAP_HUGETLB but they support instead
     // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE
@@ -391,7 +391,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
 }
 #endif
 
-// On 64-bit systems, we can do efficient aligned allocation by using 
+// On 64-bit systems, we can do efficient aligned allocation by using
 // the 4TiB to 30TiB area to allocate them.
 #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
 static volatile _Atomic(intptr_t) aligned_base;
@@ -785,14 +785,14 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
 
 
 /* ----------------------------------------------------------------------------
-Support for allocating huge OS pages (1Gib) that are reserved up-front 
+Support for allocating huge OS pages (1Gib) that are reserved up-front
 and possibly associated with a specific NUMA node. (use `numa_node>=0`)
 -----------------------------------------------------------------------------*/
-#define MI_HUGE_OS_PAGE_SIZE  (GiB)  
+#define MI_HUGE_OS_PAGE_SIZE  (GiB)
 
 #if defined(WIN32) && (MI_INTPTR_SIZE >= 8)
-static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) 
-{  
+static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
+{
   mi_assert_internal(size%GiB == 0);
 
   #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
@@ -802,8 +802,8 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
   reqs.HighestEndingAddress = NULL;
   reqs.LowestStartingAddress = NULL;
   reqs.Alignment = MI_SEGMENT_SIZE;
-  
-  // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages  
+
+  // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
   if (pNtAllocateVirtualMemoryEx != NULL) {
     #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
     #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
@@ -825,10 +825,10 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
       return base;
     }
     else {
-      // fall back to regular huge pages    
+      // fall back to regular huge pages
       _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err);
     }
-  }  
+  }
   // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation
   if (pVirtualAlloc2 != NULL) {
     params[0].Type = MemExtendedParameterAddressRequirements;
@@ -842,7 +842,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
     return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count);
   }
   #endif
-  return NULL; // give up on older Windows.. 
+  return NULL; // give up on older Windows..
 }
 #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
 #ifdef MI_HAS_NUMA
@@ -853,7 +853,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
   bool is_large = true;
   void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (p == NULL) return NULL;
-  #ifdef MI_HAS_NUMA  
+  #ifdef MI_HAS_NUMA
   if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) {
     uintptr_t numa_mask = (1UL << numa_node);
     long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
@@ -866,7 +866,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
   #endif
   return p;
 }
-#else 
+#else
 static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
   return NULL;
 }
@@ -884,12 +884,12 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) {
 }
 
 #ifdef WIN32
-static int mi_os_numa_nodex(void) {
+static int mi_os_numa_nodex() {
   PROCESSOR_NUMBER pnum;
   USHORT numa_node = 0;
   GetCurrentProcessorNumberEx(&pnum);
   GetNumaProcessorNodeEx(&pnum,&numa_node);
-  return (int)numa_node; 
+  return (int)numa_node;
 }
 
 static int mi_os_numa_node_countx(void) {
@@ -898,12 +898,42 @@ static int mi_os_numa_node_countx(void) {
   return (int)(numa_max + 1);
 }
 #elif MI_HAS_NUMA
-#include <numa.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <numaif.h>
 static int mi_os_numa_nodex(void) {
-  return numa_preferred();
+  #define MI_MAX_MASK (4)          // support at most 256 nodes
+  unsigned long mask[MI_MAX_MASK];
+  memset(mask,0,MI_MAX_MASK*sizeof(long));
+  int mode = 0;
+  long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */);
+  if (err != 0) return 0;
+  // find the lowest bit that is set
+  for(int i = 0; i < MI_MAX_MASK; i++) {
+    for(int j = 0; j < (int)(sizeof(long)*8); j++) {
+      if ((mask[i] & (1UL << j)) != 0) {
+        return (i*sizeof(long)*8 + j);
+      }
+    }
+  }
+	return 0;
 }
+
 static int mi_os_numa_node_countx(void) {
-  return (numa_max_node() + 1);
+  DIR* d = opendir("/sys/devices/system/node");
+  if (d==NULL) return 1;
+  
+  struct dirent* de;
+  int max_node_num = 0;
+  while ((de = readdir(d)) != NULL) {
+  	int node_num;
+  	if (strncmp(de->d_name, "node", 4) == 0) {
+		  node_num = (int)strtol(de->d_name+4, NULL, 0);
+			if (max_node_num < node_num) max_node_num = node_num;
+    }
+  }
+  closedir(d);
+  return (max_node_num + 1);
 }
 #else
 static int mi_os_numa_nodex(void) {
@@ -915,18 +945,28 @@ static int mi_os_numa_node_countx(void) {
 #endif
 
 int _mi_os_numa_node_count(void) {
-  long ncount = mi_os_numa_node_countx();
-  // never more than max numa node and at least 1
-  long nmax  = 1 + mi_option_get(mi_option_max_numa_node);
-  if (ncount > nmax) ncount = nmax;
-  if (ncount <= 0) ncount = 1;
-  return ncount;
+  static int numa_node_count = 0;
+  if (mi_unlikely(numa_node_count <= 0)) {
+    int ncount = mi_os_numa_node_countx();
+    // never more than max numa node and at least 1
+    int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node);
+    if (ncount > nmax) ncount = nmax;
+    if (ncount <= 0)   ncount = 1;
+    numa_node_count = ncount;
+  }
+  mi_assert_internal(numa_node_count >= 1);
+  return numa_node_count;
 }
 
-int _mi_os_numa_node(void) {
-  int nnode = mi_os_numa_nodex();
-  // never more than the node count
-  int ncount = _mi_os_numa_node_count();
-  if (nnode >= ncount) { nnode = nnode % ncount; }  
-  return nnode;
+int _mi_os_numa_node(mi_os_tld_t* tld) {
+  if (mi_unlikely(tld->numa_node < 0)) {
+    int nnode = mi_os_numa_nodex();
+    // never more than the node count
+    int ncount = _mi_os_numa_node_count();
+    if (nnode >= ncount) { nnode = nnode % ncount; }
+    if (nnode < 0) nnode = 0;
+    tld->numa_node = nnode;
+  }
+  mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count());
+  return tld->numa_node;
 }

From a69016c33e5969b07426669b58e6a927c478c308 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 Nov 2019 10:30:16 -0700
Subject: [PATCH 008/104] improve and document numa support

---
 src/os.c               | 39 +++++++++++++++++++++++++++++----------
 test/main-override.cpp |  2 +-
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/src/os.c b/src/os.c
index 677d0ea2..fc89d642 100644
--- a/src/os.c
+++ b/src/os.c
@@ -854,8 +854,11 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
   void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (p == NULL) return NULL;
   #ifdef MI_HAS_NUMA
-  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) {
+  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
     uintptr_t numa_mask = (1UL << numa_node);
+    // TODO: does `mbind` work correctly for huge OS pages? should we 
+    // use `set_mempolicy` before calling mmap instead?
+    // see: <https://lkml.org/lkml/2017/2/9/875>
     long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
     if (err != 0) {
       _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
@@ -883,6 +886,9 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) {
   return p;
 }
 
+/* ----------------------------------------------------------------------------
+Support NUMA aware allocation 
+-----------------------------------------------------------------------------*/
 #ifdef WIN32
 static int mi_os_numa_nodex() {
   PROCESSOR_NUMBER pnum;
@@ -902,6 +908,9 @@ static int mi_os_numa_node_countx(void) {
 #include <stdlib.h>
 #include <numaif.h>
 static int mi_os_numa_nodex(void) {
+  #define MI_NUMA_NODE_SLOW  // too slow, so cache it
+  // TODO: perhaps use RDTSCP instruction on x64? 
+  // see <https://stackoverflow.com/questions/16862620/numa-get-current-node-core>
   #define MI_MAX_MASK (4)          // support at most 256 nodes
   unsigned long mask[MI_MAX_MASK];
   memset(mask,0,MI_MAX_MASK*sizeof(long));
@@ -945,7 +954,7 @@ static int mi_os_numa_node_countx(void) {
 #endif
 
 int _mi_os_numa_node_count(void) {
-  static int numa_node_count = 0;
+  static int numa_node_count = 0;   // cache the node count 
   if (mi_unlikely(numa_node_count <= 0)) {
     int ncount = mi_os_numa_node_countx();
     // never more than max numa node and at least 1
@@ -959,14 +968,24 @@ int _mi_os_numa_node_count(void) {
 }
 
 int _mi_os_numa_node(mi_os_tld_t* tld) {
+  int numa_node;
+#ifndef MI_NUMA_NODE_SLOW
+  UNUSED(tld);
+  numa_node = mi_os_numa_nodex();
+#else
   if (mi_unlikely(tld->numa_node < 0)) {
-    int nnode = mi_os_numa_nodex();
-    // never more than the node count
-    int ncount = _mi_os_numa_node_count();
-    if (nnode >= ncount) { nnode = nnode % ncount; }
-    if (nnode < 0) nnode = 0;
-    tld->numa_node = nnode;
+    // Cache the NUMA node of the thread if the call is slow.
+    // This may not be correct as threads can migrate to another cpu on
+    // another node -- however, for memory allocation this just means we keep
+    // using the same 'node id' for its allocations; new OS allocations
+    // naturally come from the actual node so in practice this may be fine.
+    tld->numa_node = mi_os_numa_nodex(); 
   }
-  mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count());
-  return tld->numa_node;
+  numa_node = tld->numa_node
+#endif
+  // never more than the node count and >= 0
+  int numa_count = _mi_os_numa_node_count();
+  if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
+  if (numa_node < 0) numa_node = 0;  
+  return numa_node;
 }
diff --git a/test/main-override.cpp b/test/main-override.cpp
index e006ad27..f7a7f1bd 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -24,7 +24,7 @@ public:
 
 
 int main() {
-  //mi_stats_reset();  // ignore earlier allocations
+  mi_stats_reset();  // ignore earlier allocations
   atexit(free_p);
   void* p1 = malloc(78);
   void* p2 = mi_malloc_aligned(16,24);

From 70748ee1ee1da3e9ad14c2d751623e47cb3fd287 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 Nov 2019 10:39:09 -0700
Subject: [PATCH 009/104] fix missing semi colon

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index fc89d642..c41d028f 100644
--- a/src/os.c
+++ b/src/os.c
@@ -981,7 +981,7 @@ int _mi_os_numa_node(mi_os_tld_t* tld) {
     // naturally come from the actual node so in practice this may be fine.
     tld->numa_node = mi_os_numa_nodex(); 
   }
-  numa_node = tld->numa_node
+  numa_node = tld->numa_node;
 #endif
   // never more than the node count and >= 0
   int numa_count = _mi_os_numa_node_count();

From fd9d8c85ae40db95feb51da6e5478850bc6722fc Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 Nov 2019 11:55:03 -0700
Subject: [PATCH 010/104] change numa support on linux to use getcpu

---
 include/mimalloc-types.h |  1 -
 src/init.c               |  5 ++---
 src/os.c                 | 45 +++++++++++-----------------------------
 3 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 0208d5c7..99b6b22b 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -413,7 +413,6 @@ typedef struct mi_segments_tld_s {
 // OS thread local data
 typedef struct mi_os_tld_s {
   size_t              region_idx;   // start point for next allocation
-  int                 numa_node;    // numa node associated with this thread
   mi_stats_t*         stats;        // points to tld stats
 } mi_os_tld_t;
 
diff --git a/src/init.c b/src/init.c
index 166ca451..ed15aeba 100644
--- a/src/init.c
+++ b/src/init.c
@@ -99,8 +99,8 @@ static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
   { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
-  { 0, -1, tld_main_stats },   // os
-  { MI_STATS_NULL }            // stats
+  { 0, tld_main_stats },   // os
+  { MI_STATS_NULL }        // stats
 };
 
 mi_heap_t _mi_heap_main = {
@@ -218,7 +218,6 @@ static bool _mi_heap_init(void) {
     memset(tld, 0, sizeof(*tld));
     tld->heap_backing = heap;
     tld->segments.stats = &tld->stats;
-    tld->os.numa_node = -1;
     tld->os.stats = &tld->stats;
     _mi_heap_default = heap;
   }
diff --git a/src/os.c b/src/os.c
index c41d028f..8e1b3e91 100644
--- a/src/os.c
+++ b/src/os.c
@@ -903,29 +903,21 @@ static int mi_os_numa_node_countx(void) {
   GetNumaHighestNodeNumber(&numa_max);
   return (int)(numa_max + 1);
 }
-#elif MI_HAS_NUMA
+#elif defined(__linux__)
 #include <dirent.h>
 #include <stdlib.h>
-#include <numaif.h>
+#include <sys/syscall.h>
+
 static int mi_os_numa_nodex(void) {
-  #define MI_NUMA_NODE_SLOW  // too slow, so cache it
-  // TODO: perhaps use RDTSCP instruction on x64? 
-  // see <https://stackoverflow.com/questions/16862620/numa-get-current-node-core>
-  #define MI_MAX_MASK (4)          // support at most 256 nodes
-  unsigned long mask[MI_MAX_MASK];
-  memset(mask,0,MI_MAX_MASK*sizeof(long));
-  int mode = 0;
-  long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */);
+#ifdef SYS_getcpu
+  unsigned node = 0;
+  unsigned ncpu = 0;
+  int err = syscall(SYS_getcpu, &ncpu, &node, NULL);
   if (err != 0) return 0;
-  // find the lowest bit that is set
-  for(int i = 0; i < MI_MAX_MASK; i++) {
-    for(int j = 0; j < (int)(sizeof(long)*8); j++) {
-      if ((mask[i] & (1UL << j)) != 0) {
-        return (i*sizeof(long)*8 + j);
-      }
-    }
-  }
-	return 0;
+  return (int)node;
+#else
+  return 0;
+#endif
 }
 
 static int mi_os_numa_node_countx(void) {
@@ -968,21 +960,8 @@ int _mi_os_numa_node_count(void) {
 }
 
 int _mi_os_numa_node(mi_os_tld_t* tld) {
-  int numa_node;
-#ifndef MI_NUMA_NODE_SLOW
   UNUSED(tld);
-  numa_node = mi_os_numa_nodex();
-#else
-  if (mi_unlikely(tld->numa_node < 0)) {
-    // Cache the NUMA node of the thread if the call is slow.
-    // This may not be correct as threads can migrate to another cpu on
-    // another node -- however, for memory allocation this just means we keep
-    // using the same 'node id' for its allocations; new OS allocations
-    // naturally come from the actual node so in practice this may be fine.
-    tld->numa_node = mi_os_numa_nodex(); 
-  }
-  numa_node = tld->numa_node;
-#endif
+  int numa_node = mi_os_numa_nodex();
   // never more than the node count and >= 0
   int numa_count = _mi_os_numa_node_count();
   if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }

From ee323aabac42ab4333e40cedd02f0eb1d4356b4e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 Nov 2019 15:56:21 -0700
Subject: [PATCH 011/104] fix vs2017 build

---
 ide/vs2017/mimalloc-override.vcxproj         | 1 +
 ide/vs2017/mimalloc-override.vcxproj.filters | 3 +++
 ide/vs2017/mimalloc.vcxproj                  | 1 +
 ide/vs2017/mimalloc.vcxproj.filters          | 3 +++
 src/os.c                                     | 5 +++--
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index 511c0fab..1fc70b33 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -231,6 +231,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters
index 6ac0c0b5..75a8e032 100644
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@@ -70,5 +70,8 @@
     <ClCompile Include="..\..\src\alloc-posix.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj
index 6147c349..484c4db8 100644
--- a/ide/vs2017/mimalloc.vcxproj
+++ b/ide/vs2017/mimalloc.vcxproj
@@ -217,6 +217,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters
index a2b64314..598b8643 100644
--- a/ide/vs2017/mimalloc.vcxproj.filters
+++ b/ide/vs2017/mimalloc.vcxproj.filters
@@ -53,6 +53,9 @@
     <ClCompile Include="..\..\src\alloc-posix.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/src/os.c b/src/os.c
index 8e1b3e91..4aa4abf3 100644
--- a/src/os.c
+++ b/src/os.c
@@ -794,6 +794,7 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`)
 static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
 {
   mi_assert_internal(size%GiB == 0);
+  mi_win_enable_large_os_pages();
 
   #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
@@ -812,7 +813,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
     params[0].Pointer = &reqs;
     params[1].Type = 5; // == MemExtendedParameterAttributeFlags;
     params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
-    size_t param_count = 2;
+    ULONG param_count = 2;
     if (numa_node >= 0) {
       param_count++;
       params[2].Type = MemExtendedParameterNumaNode;
@@ -833,7 +834,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
   if (pVirtualAlloc2 != NULL) {
     params[0].Type = MemExtendedParameterAddressRequirements;
     params[0].Pointer = &reqs;
-    size_t param_count = 1;
+    ULONG param_count = 1;
     if (numa_node >= 0) {
       param_count++;
       params[1].Type = MemExtendedParameterNumaNode;

From 62cd0237fc8583f357fe4599889011f845690af1 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 Nov 2019 17:49:34 -0700
Subject: [PATCH 012/104] fix aligned huge page allocation on windows

---
 src/arena.c |   2 +-
 src/os.c    | 118 +++++++++++++++++++++++++++++++++-------------------
 2 files changed, 76 insertions(+), 44 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 7eb755c4..56b09859 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -435,7 +435,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
 
 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
   UNUSED(max_secs);
-  _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
+  _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
   if (pages_reserved != NULL) *pages_reserved = 0;
   int err = mi_reserve_huge_os_pages_interleave(pages);  
   if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
diff --git a/src/os.c b/src/os.c
index 4aa4abf3..e1dc31f8 100644
--- a/src/os.c
+++ b/src/os.c
@@ -791,68 +791,68 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`)
 #define MI_HUGE_OS_PAGE_SIZE  (GiB)
 
 #if defined(WIN32) && (MI_INTPTR_SIZE >= 8)
-static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
+static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 {
   mi_assert_internal(size%GiB == 0);
+  mi_assert_internal(addr != NULL);
+  const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
+
   mi_win_enable_large_os_pages();
-
+  
+  void* p = NULL;
   #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
-  DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
-  MEM_EXTENDED_PARAMETER params[4] = { {0,0},{0,0},{0,0},{0,0} };
-  MEM_ADDRESS_REQUIREMENTS reqs = {0,0,0};
-  reqs.HighestEndingAddress = NULL;
-  reqs.LowestStartingAddress = NULL;
-  reqs.Alignment = MI_SEGMENT_SIZE;
-
+  MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };  
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
   if (pNtAllocateVirtualMemoryEx != NULL) {
     #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
     #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
     #endif
-    params[0].Type = MemExtendedParameterAddressRequirements;
-    params[0].Pointer = &reqs;
-    params[1].Type = 5; // == MemExtendedParameterAttributeFlags;
-    params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
-    ULONG param_count = 2;
-    if (numa_node >= 0) {
-      param_count++;
-      params[2].Type = MemExtendedParameterNumaNode;
-      params[2].ULong = (unsigned)numa_node;
-    }
-    SIZE_T psize = size;
-    void* base = NULL;
-    NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
-    if (err == 0) {
-      return base;
-    }
-    else {
-      // fall back to regular huge pages
-      _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err);
-    }
-  }
-  // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation
-  if (pVirtualAlloc2 != NULL) {
-    params[0].Type = MemExtendedParameterAddressRequirements;
-    params[0].Pointer = &reqs;
+    params[0].Type = 5; // == MemExtendedParameterAttributeFlags;
+    params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
     ULONG param_count = 1;
     if (numa_node >= 0) {
       param_count++;
       params[1].Type = MemExtendedParameterNumaNode;
       params[1].ULong = (unsigned)numa_node;
     }
-    return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count);
+    SIZE_T psize = size;
+    void* base = addr;
+    NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
+    if (err == 0 && base != NULL) {
+      return base;
+    }
+    else {
+      // fall back to regular huge pages
+      _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
+    }
   }
+  // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
+  if (pVirtualAlloc2 != NULL && numa_node >= 0) {
+    params[0].Type = MemExtendedParameterNumaNode;
+    params[0].ULong = (unsigned)numa_node;    
+    p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
+  }
+  else 
   #endif
-  return NULL; // give up on older Windows..
+  // use regular virtual alloc on older windows
+  {
+    p = VirtualAlloc(addr, size, flags, PAGE_READWRITE);
+  }
+
+  if (p == NULL) {
+    _mi_warning_message("failed to allocate huge OS pages (size %zu) (error %d)\n", size, GetLastError());
+  }
+  return p;
 }
+
 #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
 #ifdef MI_HAS_NUMA
 #include <numaif.h> // mbind, and use -lnuma
 #endif
-static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
+static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
   mi_assert_internal(size%GiB == 0);
   bool is_large = true;
-  void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
+  void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (p == NULL) return NULL;
   #ifdef MI_HAS_NUMA
   if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
@@ -871,19 +871,51 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
   return p;
 }
 #else
-static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
+static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
   return NULL;
 }
 #endif
 
+// To ensure proper alignment, use our own area for huge OS pages
+static _Atomic(uintptr_t)  mi_huge_start; // = 0
+
+// Allocate MI_SEGMENT_SIZE aligned huge pages
 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) {
   if (psize != NULL) *psize = 0;
-  size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
-  void* p = mi_os_alloc_huge_os_pagesx(size, numa_node);
-  if (p==NULL) return NULL;
-  if (psize != NULL) *psize = size;
+  const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
+
+  // Find a new aligned address for the huge pages
+  uintptr_t start = 0;
+  uintptr_t end = 0;
+  uintptr_t expected;
+  do {
+    start = expected = mi_atomic_read_relaxed(&mi_huge_start);    
+    if (start == 0) {
+      // Initialize the start address after the 32TiB area
+      start = ((uintptr_t)32 << 40);    // 32TiB virtual start address
+      #if (MI_SECURE>0 || MI_DEBUG==0)  // security: randomize start of huge pages unless in debug mode
+      uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages);
+      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
+      #endif
+    }
+    end = start + size;
+    mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
+  } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected));
+
+  // And allocate
+  void* p = mi_os_alloc_huge_os_pagesx((void*)start, size, numa_node);
+  if (p == NULL) {
+    return NULL;
+  }
   _mi_stat_increase(&_mi_stats_main.committed, size);
   _mi_stat_increase(&_mi_stats_main.reserved, size);
+  if ((uintptr_t)p % MI_SEGMENT_SIZE != 0) { // must be aligned
+    _mi_warning_message("huge page area was not aligned\n");
+    _mi_os_free(p,size,&_mi_stats_main);
+    return NULL;
+  }
+  
+  if (psize != NULL) *psize = size;
   return p;
 }
 

From 723fbba2596e663b6dac40da5e486c0ac52501f3 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 Nov 2019 12:18:20 -0800
Subject: [PATCH 013/104] fix output during preloading enabling stderr only
 after the crt has loaded

---
 src/options.c | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/options.c b/src/options.c
index 32f13d54..3a7833a2 100644
--- a/src/options.c
+++ b/src/options.c
@@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <ctype.h>  // toupper
 #include <stdarg.h>
 
+static void mi_add_stderr_output();
+
 int mi_version(void) mi_attr_noexcept {
   return MI_MALLOC_VERSION;
 }
@@ -73,7 +75,9 @@ static mi_option_desc_t options[_mi_option_last] =
 static void mi_option_init(mi_option_desc_t* desc);
 
 void _mi_options_init(void) {
-  // called on process load
+  // called on process load; should not be called before the CRT is initialized!
+  // (e.g. do not call this from process_init as that may run before CRT initialization)
+  mi_add_stderr_output(); // now it safe to use stderr for output
   for(int i = 0; i < _mi_option_last; i++ ) {
     mi_option_t option = (mi_option_t)i;
     mi_option_get(option); // initialize
@@ -135,7 +139,7 @@ static void mi_out_stderr(const char* msg) {
   #ifdef _WIN32
   // on windows with redirection, the C runtime cannot handle locale dependent output 
   // after the main thread closes so we use direct console output.
-  _cputs(msg);
+  if (!_mi_preloading()) { _cputs(msg); }
   #else
   fputs(msg, stderr);
   #endif
@@ -166,23 +170,29 @@ static void mi_out_buf(const char* msg) {
   memcpy(&out_buf[start], msg, n);
 }
 
-static void mi_out_buf_flush(mi_output_fun* out) {
+static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
   if (out==NULL) return;
-  // claim all (no more output will be added after this point)
-  size_t count = mi_atomic_addu(&out_len, MI_MAX_DELAY_OUTPUT);
+  // claim (if `no_more_buf == true`, no more output will be added after this point)
+  size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
   // and output the current contents
   if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
   out_buf[count] = 0;
   out(out_buf);
+  if (!no_more_buf) {
+    out_buf[count] = '\n'; // if continue with the buffer, insert a newline    
+  }
 }
 
-// The initial default output, outputs to stderr and the delayed output buffer.
+
+// Once this module is loaded, switch to this routine
+// which outputs to stderr and the delayed output buffer.
 static void mi_out_buf_stderr(const char* msg) {
   mi_out_stderr(msg);
   mi_out_buf(msg);
 }
 
 
+
 // --------------------------------------------------------
 // Default output handler
 // --------------------------------------------------------
@@ -194,14 +204,19 @@ static mi_output_fun* volatile mi_out_default; // = NULL
 
 static mi_output_fun* mi_out_get_default(void) {
   mi_output_fun* out = mi_out_default;
-  return (out == NULL ? &mi_out_buf_stderr : out);
+  return (out == NULL ? &mi_out_buf : out);
 }
 
 void mi_register_output(mi_output_fun* out) mi_attr_noexcept {
   mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer
-  if (out!=NULL) mi_out_buf_flush(out);  // output the delayed output now
+  if (out!=NULL) mi_out_buf_flush(out,true);             // output all the delayed output now
 }
 
+// add stderr to the delayed output after the module is loaded
+static void mi_add_stderr_output() {
+  mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr
+  mi_out_default = &mi_out_buf_stderr;     // and add stderr to the delayed output
+}
 
 // --------------------------------------------------------
 // Messages, all end up calling `_mi_fputs`.
@@ -214,7 +229,7 @@ static volatile _Atomic(uintptr_t) error_count; // = 0;  // when MAX_ERROR_COUNT
 static mi_decl_thread bool recurse = false;
 
 void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) {
-  if (_mi_preloading() || recurse) return;
+  if (recurse) return;
   if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default();
   recurse = true;
   if (prefix != NULL) out(prefix);
@@ -228,7 +243,7 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) {
 static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) {
   char buf[512];
   if (fmt==NULL) return;
-  if (_mi_preloading() || recurse) return;
+  if (recurse) return;
   recurse = true;
   vsnprintf(buf,sizeof(buf)-1,fmt,args);
   recurse = false;

From e32048879183c2672db7d06138ca6f4eb80ebfa1 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 Nov 2019 12:18:32 -0800
Subject: [PATCH 014/104] add numa nodes to stats

---
 include/mimalloc-internal.h | 2 +-
 src/os.c                    | 7 +++++--
 src/stats.c                 | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index b4d3351d..c28cf0fd 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -57,7 +57,7 @@ void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocat
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 size_t     _mi_os_good_alloc_size(size_t size);
 int        _mi_os_numa_node(mi_os_tld_t* tld);
-
+int        _mi_os_numa_node_count(void);
 
 // memory.c
 void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
diff --git a/src/os.c b/src/os.c
index e1dc31f8..af3c440c 100644
--- a/src/os.c
+++ b/src/os.c
@@ -840,7 +840,8 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
   }
 
   if (p == NULL) {
-    _mi_warning_message("failed to allocate huge OS pages (size %zu) (error %d)\n", size, GetLastError());
+    DWORD winerr = GetLastError();
+    _mi_warning_message("failed to allocate huge OS pages (size %zu) (windows error %d%s)\n", size, winerr, (winerr==1450 ? " (insufficient resources)" : ""));
   }
   return p;
 }
@@ -981,12 +982,14 @@ static int mi_os_numa_node_countx(void) {
 int _mi_os_numa_node_count(void) {
   static int numa_node_count = 0;   // cache the node count 
   if (mi_unlikely(numa_node_count <= 0)) {
-    int ncount = mi_os_numa_node_countx();
+    int ncount = mi_os_numa_node_countx();    
+    int ncount0 = ncount;
     // never more than max numa node and at least 1
     int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node);
     if (ncount > nmax) ncount = nmax;
     if (ncount <= 0)   ncount = 1;
     numa_node_count = ncount;
+    _mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0);
   }
   mi_assert_internal(numa_node_count >= 1);
   return numa_node_count;
diff --git a/src/stats.c b/src/stats.c
index 50bd029d..79362cc4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -265,7 +265,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out)
   mi_stat_counter_print(&stats->commit_calls, "commits", out);
   mi_stat_print(&stats->threads, "threads", -1, out);
   mi_stat_counter_print_avg(&stats->searches, "searches", out);
-
+  _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count());
   if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs);
 
   double user_time;

From f36ec5d9d8275777e05526468524dfd9d433164e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 Nov 2019 13:16:07 -0800
Subject: [PATCH 015/104] reserve huge pages incrementally

---
 src/arena.c   |  23 ++++++----
 src/options.c |   1 -
 src/os.c      | 120 ++++++++++++++++++++++++++++++++++----------------
 3 files changed, 96 insertions(+), 48 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 56b09859..24fd2114 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -27,7 +27,10 @@ with on-demand coalescing.
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
 //int   _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize);
+
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize);
+void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
+
 int   _mi_os_numa_node_count(void);
 
 /* -----------------------------------------------------------
@@ -234,12 +237,12 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
   void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index);
   if (p != NULL) {
     mi_assert_internal(block_index != SIZE_MAX);
-#if MI_DEBUG>=1
+    #if MI_DEBUG>=1
     _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
     mi_block_info_t binfo = mi_atomic_read(block);
     mi_assert_internal(mi_block_is_in_use(binfo));
     mi_assert_internal(mi_block_count(binfo) >= needed_bcount);
-#endif
+    #endif
     *memid = mi_memid_create(arena_index, block_index);
     *commit = true;           // TODO: support commit on demand?
     *large = arena->is_large;
@@ -382,18 +385,22 @@ static bool mi_arena_add(mi_arena_t* arena) {
 
 // reserve at a specific numa node
 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
-  size_t hsize = 0;
   if (numa_node < -1) numa_node = -1;
   if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
-  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize);
-  if (p==NULL) return ENOMEM;
-  _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages);
+  size_t hsize = 0;
+  size_t pages_reserved = 0;
+  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, (double)pages / 2.0, &pages_reserved, &hsize);
+  if (p==NULL || pages_reserved==0) {
+    _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
+    return ENOMEM;
+  }
+  _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
   
   size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
   size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t));  // one too much
   mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) {
-    _mi_os_free(p, hsize, &_mi_stats_main);
+    _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
     return ENOMEM;
   }
   arena->block_count = bcount;
diff --git a/src/options.c b/src/options.c
index 3a7833a2..11d12187 100644
--- a/src/options.c
+++ b/src/options.c
@@ -221,7 +221,6 @@ static void mi_add_stderr_output() {
 // --------------------------------------------------------
 // Messages, all end up calling `_mi_fputs`.
 // --------------------------------------------------------
-#define MAX_ERROR_COUNT (10)
 static volatile _Atomic(uintptr_t) error_count; // = 0;  // when MAX_ERROR_COUNT stop emitting errors and warnings
 
 // When overriding malloc, we may recurse into mi_vfprintf if an allocation
diff --git a/src/os.c b/src/os.c
index af3c440c..5947333d 100644
--- a/src/os.c
+++ b/src/os.c
@@ -339,7 +339,8 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       lflags |= MAP_HUGETLB;
       #endif
       #ifdef MAP_HUGE_1GB
-      if ((size % GiB) == 0) {
+      static bool mi_huge_pages_available = true;
+      if ((size % GiB) == 0 && mi_huge_pages_available) {
         lflags |= MAP_HUGE_1GB;
       }
       else
@@ -358,6 +359,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
         p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
         #ifdef MAP_HUGE_1GB
         if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
+          mi_huge_pages_available = false; // don't try huge 1GiB pages again
           _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno);
           lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
           p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
@@ -799,11 +801,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 
   mi_win_enable_large_os_pages();
   
-  void* p = NULL;
   #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };  
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
-  if (pNtAllocateVirtualMemoryEx != NULL) {
+  static bool mi_huge_pages_available = true;
+  if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
     #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
     #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
     #endif
@@ -822,7 +824,8 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
       return base;
     }
     else {
-      // fall back to regular huge pages
+      // fall back to regular large pages
+      mi_huge_pages_available = false; // don't try further huge pages
       _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
     }
   }
@@ -830,20 +833,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
   if (pVirtualAlloc2 != NULL && numa_node >= 0) {
     params[0].Type = MemExtendedParameterNumaNode;
     params[0].ULong = (unsigned)numa_node;    
-    p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
+    return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
   }
-  else 
   #endif
-  // use regular virtual alloc on older windows
-  {
-    p = VirtualAlloc(addr, size, flags, PAGE_READWRITE);
-  }
-
-  if (p == NULL) {
-    DWORD winerr = GetLastError();
-    _mi_warning_message("failed to allocate huge OS pages (size %zu) (windows error %d%s)\n", size, winerr, (winerr==1450 ? " (insufficient resources)" : ""));
-  }
-  return p;
+  // otherwise use regular virtual alloc on older windows
+  return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
 }
 
 #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
@@ -880,44 +874,92 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 // To ensure proper alignment, use our own area for huge OS pages
 static _Atomic(uintptr_t)  mi_huge_start; // = 0
 
-// Allocate MI_SEGMENT_SIZE aligned huge pages
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) {
-  if (psize != NULL) *psize = 0;
+// Claim an aligned address range for huge pages
+static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
+  if (total_size != NULL) *total_size = 0;
   const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
 
-  // Find a new aligned address for the huge pages
   uintptr_t start = 0;
   uintptr_t end = 0;
   uintptr_t expected;
   do {
-    start = expected = mi_atomic_read_relaxed(&mi_huge_start);    
+    start = expected = mi_atomic_read_relaxed(&mi_huge_start);
     if (start == 0) {
       // Initialize the start address after the 32TiB area
-      start = ((uintptr_t)32 << 40);    // 32TiB virtual start address
-      #if (MI_SECURE>0 || MI_DEBUG==0)  // security: randomize start of huge pages unless in debug mode
-      uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages);
+      start = ((uintptr_t)32 << 40);  // 32TiB virtual start address
+#if (MI_SECURE>0 || MI_DEBUG==0)      // security: randomize start of huge pages unless in debug mode
+      uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages);
       start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
-      #endif
+#endif
     }
     end = start + size;
     mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
   } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected));
 
-  // And allocate
-  void* p = mi_os_alloc_huge_os_pagesx((void*)start, size, numa_node);
-  if (p == NULL) {
-    return NULL;
-  }
-  _mi_stat_increase(&_mi_stats_main.committed, size);
-  _mi_stat_increase(&_mi_stats_main.reserved, size);
-  if ((uintptr_t)p % MI_SEGMENT_SIZE != 0) { // must be aligned
-    _mi_warning_message("huge page area was not aligned\n");
-    _mi_os_free(p,size,&_mi_stats_main);
-    return NULL;
-  }
+  if (total_size != NULL) *total_size = size;
+  return (uint8_t*)start;
+}
+
+// Allocate MI_SEGMENT_SIZE aligned huge pages
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize) {
+  if (psize != NULL) *psize = 0;
+  if (pages_reserved != NULL) *pages_reserved = 0;
+  size_t size = 0;
+  uint8_t* start = mi_os_claim_huge_pages(pages, &size);
   
-  if (psize != NULL) *psize = size;
-  return p;
+  // Allocate one page at the time but try to place them contiguously
+  // We allocate one page at the time to be able to abort if it takes too long
+  // or to at least allocate as many as available on the system.
+  double start_t = _mi_clock_start();
+  size_t page;
+  for (page = 0; page < pages; page++) {
+    // allocate a page
+    bool  is_large = true;
+    void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
+    void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node);
+
+    // Did we succeed at a contiguous address?
+    if (p != addr) {
+      // no success, issue a warning and break
+      if (p != NULL) {
+        _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr);
+        _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
+      }
+      break;
+    }
+    
+    // success, record it
+    _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
+    _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
+    
+    // check for timeout
+    double elapsed = _mi_clock_end(start_t);
+    if (page >= 1) {
+      double estimate = ((elapsed / (double)(page+1)) * (double)pages);
+      if (estimate > 1.5*max_secs) { // seems like we are going to timeout, break
+        elapsed = max_secs + 1.0; 
+      }
+    }
+    if (elapsed > max_secs) {
+      _mi_warning_message("huge page allocation timed out\n");
+      break;
+    }
+  }
+  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
+  if (pages_reserved != NULL) *pages_reserved = page;
+  if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE;
+  return (page == 0 ? NULL : start);
+}
+
+// free every huge page in a range individually (as we allocated per page)
+// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
+void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
+  if (p==NULL || size==0) return;
+  uint8_t* base = (uint8_t*)p;
+  while (size >= MI_HUGE_OS_PAGE_SIZE) {
+    _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats);
+    size -= MI_HUGE_OS_PAGE_SIZE;
+  }
 }
 
 /* ----------------------------------------------------------------------------

From 520a8dafee0747e1da8b220b28b35298f10512b2 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 Nov 2019 13:25:28 -0800
Subject: [PATCH 016/104] divide huge pages more even

---
 src/arena.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 24fd2114..95a102d1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -385,6 +385,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
 
 // reserve at a specific numa node
 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
+  if (pages==0) return 0;
   if (numa_node < -1) numa_node = -1;
   if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
   size_t hsize = 0;
@@ -422,18 +423,20 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
   // pages per numa node
   int numa_count = _mi_os_numa_node_count();
   if (numa_count <= 0) numa_count = 1;
-  size_t pages_per = pages / numa_count;
-  if (pages_per == 0) pages_per = 1;
+  const size_t pages_per = pages / numa_count;
+  const size_t pages_mod = pages % numa_count;
   
   // reserve evenly among numa nodes
   for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
-    int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node);
+    size_t node_pages = pages_per;  // can be 0
+    if (numa_node < pages_mod) node_pages++;
+    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node);
     if (err) return err;
-    if (pages < pages_per) {
+    if (pages < node_pages) {
       pages = 0;
     }
     else {
-      pages -= pages_per;
+      pages -= node_pages;
     }
   }
 

From d1d65fbca4d037c5b9cc0838074804fde1f505c7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 Nov 2019 13:25:41 -0800
Subject: [PATCH 017/104] make max error messages configurable

---
 include/mimalloc.h |  1 +
 src/options.c      | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index b155aca6..c03ddc1e 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -277,6 +277,7 @@ typedef enum mi_option_e {
   mi_option_segment_reset,
   mi_option_os_tag,
   mi_option_max_numa_node,
+  mi_option_max_errors,
   _mi_option_last
 } mi_option_t;
 
diff --git a/src/options.c b/src/options.c
index 11d12187..63b1612a 100644
--- a/src/options.c
+++ b/src/options.c
@@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <ctype.h>  // toupper
 #include <stdarg.h>
 
+static uintptr_t mi_max_error_count = 16;  // stop outputting errors after this
+
 static void mi_add_stderr_output();
 
 int mi_version(void) mi_attr_noexcept {
@@ -69,7 +71,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
-  { 256, UNINIT, MI_OPTION(max_numa_node) }      // maximum allowed numa node
+  { 256, UNINIT, MI_OPTION(max_numa_node) },     // maximum allowed numa node
+  { 16, UNINIT, MI_OPTION(max_errors) }          // maximum errors that are output
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
@@ -86,6 +89,7 @@ void _mi_options_init(void) {
       _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
     }
   }
+  mi_max_error_count = mi_option_get(mi_option_max_errors);
 }
 
 long mi_option_get(mi_option_t option) {
@@ -275,7 +279,7 @@ void _mi_verbose_message(const char* fmt, ...) {
 
 void _mi_error_message(const char* fmt, ...) {
   if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
-  if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return;
+  if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
   va_list args;
   va_start(args,fmt);
   mi_vfprintf(NULL, "mimalloc: error: ", fmt, args);
@@ -285,7 +289,7 @@ void _mi_error_message(const char* fmt, ...) {
 
 void _mi_warning_message(const char* fmt, ...) {
   if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
-  if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return;
+  if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
   va_list args;
   va_start(args,fmt);
   mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args);

From 9d6a5acb228db9cd4ae8f50ef2295e9b5d57e3c8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 Nov 2019 13:34:54 -0800
Subject: [PATCH 018/104] fix unix build warnings

---
 CMakeLists.txt | 5 +++--
 src/arena.c    | 2 +-
 src/heap.c     | 2 +-
 src/os.c       | 1 -
 src/page.c     | 2 +-
 src/segment.c  | 6 ++++--
 6 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1e96c237..12540f68 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,5 @@
 cmake_minimum_required(VERSION 3.0)
 project(libmimalloc C CXX)
-include("cmake/mimalloc-config-version.cmake")
-include("CheckIncludeFile")
 
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_CXX_STANDARD 17)
@@ -15,6 +13,9 @@ option(MI_SECURE            "Use security mitigations (like guard pages and rand
 option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
 option(MI_BUILD_TESTS       "Build test executables" ON)
 
+include("cmake/mimalloc-config-version.cmake")
+include("CheckIncludeFile")
+
 set(mi_install_dir "lib/mimalloc-${mi_version}")
 
 set(mi_sources
diff --git a/src/arena.c b/src/arena.c
index 95a102d1..08a36415 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -429,7 +429,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
   // reserve evenly among numa nodes
   for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
     size_t node_pages = pages_per;  // can be 0
-    if (numa_node < pages_mod) node_pages++;
+    if ((size_t)numa_node < pages_mod) node_pages++;
     int err = mi_reserve_huge_os_pages_at(node_pages, numa_node);
     if (err) return err;
     if (pages < node_pages) {
diff --git a/src/heap.c b/src/heap.c
index 15c5d02a..162cf406 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -45,7 +45,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
 }
 
 
-#if MI_DEBUG>1
+#if MI_DEBUG>=3
 static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
   UNUSED(arg1);
   UNUSED(arg2);
diff --git a/src/os.c b/src/os.c
index 5947333d..3f299362 100644
--- a/src/os.c
+++ b/src/os.c
@@ -914,7 +914,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, s
   size_t page;
   for (page = 0; page < pages; page++) {
     // allocate a page
-    bool  is_large = true;
     void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
     void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node);
 
diff --git a/src/page.c b/src/page.c
index f7fad764..32b68edb 100644
--- a/src/page.c
+++ b/src/page.c
@@ -38,7 +38,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
 static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats);
 
 
-#if (MI_DEBUG>1)
+#if (MI_DEBUG>=3)
 static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
   size_t count = 0;
   while (head != NULL) {
diff --git a/src/segment.c b/src/segment.c
index dcc6a04b..178e0eda 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -41,7 +41,7 @@ terms of the MIT license. A copy of the license can be found in the file
 ----------------------------------------------------------- */
 
 
-#if (MI_DEBUG>1)
+#if (MI_DEBUG>=3)
 static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) {
   mi_assert_internal(segment != NULL);
   mi_segment_t* list = queue->first;
@@ -111,7 +111,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t
  Invariant checking
 ----------------------------------------------------------- */
 
-#if (MI_DEBUG > 1)
+#if (MI_DEBUG>=2)
 static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld);
   bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment));
@@ -120,7 +120,9 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t
   }
   return in_queue;
 }
+#endif
 
+#if (MI_DEBUG>=3)
 static size_t mi_segment_pagesize(mi_segment_t* segment) {
   return ((size_t)1 << segment->page_shift);
 }

From 8afd06b248f6a82763292821bf5096e35f6a5a0b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 4 Nov 2019 08:44:40 -0800
Subject: [PATCH 019/104] use int64 for time (instead of double)

---
 include/mimalloc-internal.h |  7 ++-
 src/arena.c                 |  4 +-
 src/memory.c                |  1 +
 src/os.c                    | 22 ++++++---
 src/stats.c                 | 95 ++++++++++++++++++-------------------
 5 files changed, 70 insertions(+), 59 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index c28cf0fd..413f76e6 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -106,8 +106,11 @@ uintptr_t  _mi_heap_random(mi_heap_t* heap);
 
 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
-double     _mi_clock_end(double start);
-double     _mi_clock_start(void);
+
+typedef int64_t mi_msecs_t;
+mi_msecs_t  _mi_clock_now(void);
+mi_msecs_t  _mi_clock_end(mi_msecs_t start);
+mi_msecs_t  _mi_clock_start(void);
 
 // "alloc.c"
 void*       _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept;  // called from `_mi_malloc_generic`
diff --git a/src/arena.c b/src/arena.c
index 08a36415..6faf7d3e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -28,7 +28,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
 //int   _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
 
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize);
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
 void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 
 int   _mi_os_numa_node_count(void);
@@ -390,7 +390,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
   if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
   size_t hsize = 0;
   size_t pages_reserved = 0;
-  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, (double)pages / 2.0, &pages_reserved, &hsize);
+  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize);
   if (p==NULL || pages_reserved==0) {
     _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
     return ENOMEM;
diff --git a/src/memory.c b/src/memory.c
index a425393c..75a1df92 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -564,6 +564,7 @@ void _mi_mem_collect(mi_stats_t* stats) {
   }
 }
 
+
 /* ----------------------------------------------------------------------------
   Other
 -----------------------------------------------------------------------------*/
diff --git a/src/os.c b/src/os.c
index 3f299362..44ef9830 100644
--- a/src/os.c
+++ b/src/os.c
@@ -871,6 +871,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 }
 #endif
 
+#if (MI_INTPTR_SIZE >= 8) 
 // To ensure proper alignment, use our own area for huge OS pages
 static _Atomic(uintptr_t)  mi_huge_start; // = 0
 
@@ -899,18 +900,25 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
   if (total_size != NULL) *total_size = size;
   return (uint8_t*)start;
 }
+#else
+static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
+  if (total_size != NULL) *total_size = 0;
+  return NULL;
+}
+#endif
 
 // Allocate MI_SEGMENT_SIZE aligned huge pages
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize) {
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) {
   if (psize != NULL) *psize = 0;
   if (pages_reserved != NULL) *pages_reserved = 0;
   size_t size = 0;
   uint8_t* start = mi_os_claim_huge_pages(pages, &size);
+  if (start == NULL) return NULL; // or 32-bit systems
   
   // Allocate one page at the time but try to place them contiguously
   // We allocate one page at the time to be able to abort if it takes too long
   // or to at least allocate as many as available on the system.
-  double start_t = _mi_clock_start();
+  mi_msecs_t start_t = _mi_clock_start();
   size_t page;
   for (page = 0; page < pages; page++) {
     // allocate a page
@@ -932,14 +940,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, s
     _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
     
     // check for timeout
-    double elapsed = _mi_clock_end(start_t);
+    mi_msecs_t elapsed = _mi_clock_end(start_t);
     if (page >= 1) {
-      double estimate = ((elapsed / (double)(page+1)) * (double)pages);
-      if (estimate > 1.5*max_secs) { // seems like we are going to timeout, break
-        elapsed = max_secs + 1.0; 
+      mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
+      if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
+        elapsed = max_msecs + 1; 
       }
     }
-    if (elapsed > max_secs) {
+    if (elapsed > max_msecs) {
       _mi_warning_message("huge page allocation timed out\n");
       break;
     }
diff --git a/src/stats.c b/src/stats.c
index 79362cc4..a1248043 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -231,9 +231,9 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin
 #endif
 
 
-static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit);
+static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit);
 
-static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept {
+static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) mi_attr_noexcept {
   mi_print_header(out);
   #if MI_STAT>1
   mi_stat_count_t normal = { 0,0,0,0 };
@@ -266,16 +266,16 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out)
   mi_stat_print(&stats->threads, "threads", -1, out);
   mi_stat_counter_print_avg(&stats->searches, "searches", out);
   _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count());
-  if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs);
+  if (elapsed > 0) _mi_fprintf(out, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
 
-  double user_time;
-  double sys_time;
+  mi_msecs_t user_time;
+  mi_msecs_t sys_time;
   size_t peak_rss;
   size_t page_faults;
   size_t page_reclaim;
   size_t peak_commit;
   mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit);
-  _mi_fprintf(out,"%10s: user: %.3f s, system: %.3f s, faults: %lu, reclaims: %lu, rss: ", "process", user_time, sys_time, (unsigned long)page_faults, (unsigned long)page_reclaim );
+  _mi_fprintf(out,"%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim );
   mi_printf_amount((int64_t)peak_rss, 1, out, "%s");
   if (peak_commit > 0) {
     _mi_fprintf(out,", commit charge: ");
@@ -284,9 +284,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out)
   _mi_fprintf(out,"\n");
 }
 
-double _mi_clock_end(double start);
-double _mi_clock_start(void);
-static double mi_time_start = 0.0;
+static mi_msecs_t mi_time_start; // = 0
 
 static mi_stats_t* mi_stats_get_default(void) {
   mi_heap_t* heap = mi_heap_get_default();
@@ -316,71 +314,72 @@ void _mi_stats_done(mi_stats_t* stats) {  // called from `mi_thread_done`
 }
 
 
-static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) {
+static void mi_stats_print_ex(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) {
   mi_stats_merge_from(stats);
-  _mi_stats_print(&_mi_stats_main, secs, out);
+  _mi_stats_print(&_mi_stats_main, elapsed, out);
 }
 
 void mi_stats_print(mi_output_fun* out) mi_attr_noexcept {
-  mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out);
+  mi_msecs_t elapsed = _mi_clock_end(mi_time_start);
+  mi_stats_print_ex(mi_stats_get_default(),elapsed,out);
 }
 
 void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept {
-  _mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out);
+  mi_msecs_t elapsed = _mi_clock_end(mi_time_start);
+  _mi_stats_print(mi_stats_get_default(), elapsed, out);
 }
 
 
-
-// --------------------------------------------------------
-// Basic timer for convenience
-// --------------------------------------------------------
-
+// ----------------------------------------------------------------
+// Basic timer for convenience; use milli-seconds to avoid doubles
+// ----------------------------------------------------------------
 #ifdef _WIN32
 #include <windows.h>
-static double mi_to_seconds(LARGE_INTEGER t) {
-  static double freq = 0.0;
-  if (freq <= 0.0) {
+static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
+  static LARGE_INTEGER mfreq; // = 0
+  if (mfreq.QuadPart == 0LL) {
     LARGE_INTEGER f;
     QueryPerformanceFrequency(&f);
-    freq = (double)(f.QuadPart);
+    mfreq.QuadPart = f.QuadPart/1000LL;
+    if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
   }
-  return ((double)(t.QuadPart) / freq);
+  return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);  
 }
 
-static double mi_clock_now(void) {
+mi_msecs_t _mi_clock_now(void) {
   LARGE_INTEGER t;
   QueryPerformanceCounter(&t);
-  return mi_to_seconds(t);
+  return mi_to_msecs(t);
 }
 #else
 #include <time.h>
 #ifdef CLOCK_REALTIME
-static double mi_clock_now(void) {
+mi_msecs_t _mi_clock_now(void) {
   struct timespec t;
   clock_gettime(CLOCK_REALTIME, &t);
-  return (double)t.tv_sec + (1.0e-9 * (double)t.tv_nsec);
+  return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
 }
 #else
 // low resolution timer
-static double mi_clock_now(void) {
-  return ((double)clock() / (double)CLOCKS_PER_SEC);
+mi_msecs_t _mi_clock_now(void) {
+  return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000));
 }
 #endif
 #endif
 
 
-static double mi_clock_diff = 0.0;
+static mi_msecs_t mi_clock_diff;
 
-double _mi_clock_start(void) {
+mi_msecs_t _mi_clock_start(void) {
   if (mi_clock_diff == 0.0) {
-    double t0 = mi_clock_now();
-    mi_clock_diff = mi_clock_now() - t0;
+    mi_msecs_t t0 = _mi_clock_now();
+    mi_clock_diff = _mi_clock_now() - t0;
   }
-  return mi_clock_now();
+  return _mi_clock_now();
 }
 
-double _mi_clock_end(double start) {
-  double end = mi_clock_now();
+mi_msecs_t _mi_clock_end(mi_msecs_t start) {
+  mi_msecs_t end = _mi_clock_now();
   return (end - start - mi_clock_diff);
 }
 
@@ -394,21 +393,21 @@ double _mi_clock_end(double start) {
 #include <psapi.h>
 #pragma comment(lib,"psapi.lib")
 
-static double filetime_secs(const FILETIME* ftime) {
+static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
   ULARGE_INTEGER i;
   i.LowPart = ftime->dwLowDateTime;
   i.HighPart = ftime->dwHighDateTime;
-  double secs = (double)(i.QuadPart) * 1.0e-7; // FILETIME is in 100 nano seconds
-  return secs;
+  mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
+  return msecs;
 }
-static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
+static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
   FILETIME ct;
   FILETIME ut;
   FILETIME st;
   FILETIME et;
   GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
-  *utime = filetime_secs(&ut);
-  *stime = filetime_secs(&st);
+  *utime = filetime_msecs(&ut);
+  *stime = filetime_msecs(&st);
 
   PROCESS_MEMORY_COUNTERS info;
   GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
@@ -427,11 +426,11 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size
 #include <mach/mach.h>
 #endif
 
-static double timeval_secs(const struct timeval* tv) {
-  return (double)tv->tv_sec + ((double)tv->tv_usec * 1.0e-6);
+static mi_msecs_t timeval_secs(const struct timeval* tv) {
+  return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
 }
 
-static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
+static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
   struct rusage rusage;
   getrusage(RUSAGE_SELF, &rusage);
 #if defined(__APPLE__) && defined(__MACH__)
@@ -452,12 +451,12 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size
 #pragma message("define a way to get process info")
 #endif
 
-static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
+static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
   *peak_rss = 0;
   *page_faults = 0;
   *page_reclaim = 0;
   *peak_commit = 0;
-  *utime = 0.0;
-  *stime = 0.0;
+  *utime = 0;
+  *stime = 0;
 }
 #endif

From 3d0a1e249fa113e93792838a00a7acd9fc98aa34 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 4 Nov 2019 09:40:10 -0800
Subject: [PATCH 020/104] remove all floating point types and arithmetic

---
 src/arena.c |  1 -
 src/init.c  |  3 +--
 src/stats.c | 32 +++++++++++++++++++-------------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 6faf7d3e..e58d2c47 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -25,7 +25,6 @@ with on-demand coalescing.
 
 // os.c
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
-//int   _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
 
 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
diff --git a/src/init.c b/src/init.c
index ed15aeba..ef848de4 100644
--- a/src/init.c
+++ b/src/init.c
@@ -433,8 +433,7 @@ static void mi_process_load(void) {
   }
 
   if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
-    size_t pages     = mi_option_get(mi_option_reserve_huge_os_pages);
-    // double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB)
+    size_t pages     = mi_option_get(mi_option_reserve_huge_os_pages);    
     mi_reserve_huge_os_pages_interleave(pages);
   }
 }
diff --git a/src/stats.c b/src/stats.c
index a1248043..011fab64 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -130,19 +130,23 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const
   char buf[32];
   int  len = 32;
   const char* suffix = (unit <= 0 ? " " : "b");
-  double base = (unit == 0 ? 1000.0 : 1024.0);
+  const int64_t base = (unit == 0 ? 1000 : 1024);
   if (unit>0) n *= unit;
 
-  double pos = (double)(n < 0 ? -n : n);
-  if (pos < base)
-    snprintf(buf,len, "%d %s ", (int)n, suffix);
-  else if (pos < base*base)
-    snprintf(buf, len, "%.1f k%s", (double)n / base, suffix);
-  else if (pos < base*base*base)
-    snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix);
-  else
-    snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix);
-
+  const int64_t pos = (n < 0 ? -n : n);
+  if (pos < base) {
+    snprintf(buf, len, "%d %s ", (int)n, suffix);
+  }
+  else {
+    int64_t divider = base;
+    const char* magnitude = "k";
+    if (pos >= divider*base) { divider *= base; magnitude = "m"; }
+    if (pos >= divider*base) { divider *= base; magnitude = "g"; }
+    const int64_t tens = (n / (divider/10));
+    const long whole = (long)(tens/10);
+    const long frac1 = (long)(tens%10);
+    snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix);
+  }
   _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf);
 }
 
@@ -199,8 +203,10 @@ static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg
 }
 
 static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) {
-  double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
-  _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg);
+  const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); 
+  const long avg_whole = (long)(avg_tens/10);
+  const long avg_frac1 = (long)(avg_tens%10);
+  _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1);
 }
 
 

From 829fd872f407c5e201cd844b8f26f2c87915e89b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 4 Nov 2019 11:48:41 -0800
Subject: [PATCH 021/104] initial delay slots

---
 include/mimalloc-internal.h |  11 ++-
 include/mimalloc-types.h    |  26 +++++--
 include/mimalloc.h          |   1 +
 src/heap.c                  |   2 +-
 src/init.c                  |   4 +-
 src/memory.c                | 143 +++++++++++++++++++++++++++++++-----
 src/options.c               |   1 +
 src/segment.c               |  31 ++++----
 src/stats.c                 |   2 +-
 9 files changed, 171 insertions(+), 50 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 413f76e6..25a3d93d 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -61,15 +61,15 @@ int        _mi_os_numa_node_count(void);
 
 // memory.c
 void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
-void       _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats);
+void       _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld);
 
-bool       _mi_mem_reset(void* p, size_t size, mi_stats_t* stats);
-bool       _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-bool       _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+bool       _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
+bool       _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
+bool       _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
 bool       _mi_mem_protect(void* addr, size_t size);
 bool       _mi_mem_unprotect(void* addr, size_t size);
 
-void        _mi_mem_collect(mi_stats_t* stats);
+void        _mi_mem_collect(mi_os_tld_t* tld);
 
 // "segment.c"
 mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
@@ -107,7 +107,6 @@ uintptr_t  _mi_heap_random(mi_heap_t* heap);
 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
 
-typedef int64_t mi_msecs_t;
 mi_msecs_t  _mi_clock_now(void);
 mi_msecs_t  _mi_clock_end(mi_msecs_t start);
 mi_msecs_t  _mi_clock_start(void);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 99b6b22b..8a3ffff4 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -385,6 +385,19 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
 #define mi_heap_stat_decrease(heap,stat,amount)  mi_stat_decrease( (heap)->tld->stats.stat, amount)
 
 
+// ------------------------------------------------------
+// Delay slots (to avoid expensive OS calls)
+// ------------------------------------------------------
+typedef int64_t mi_msecs_t;
+
+typedef struct mi_delay_slot_s {
+  mi_msecs_t expire;
+  uint8_t*   addr;
+  size_t     size;
+} mi_delay_slot_t;
+
+#define MI_RESET_DELAY_SLOTS (128)
+
 // ------------------------------------------------------
 // Thread Local data
 // ------------------------------------------------------
@@ -395,6 +408,12 @@ typedef struct mi_segment_queue_s {
   mi_segment_t* last;
 } mi_segment_queue_t;
 
+// OS thread local data
+typedef struct mi_os_tld_s {
+  size_t              region_idx;   // start point for next allocation
+  mi_stats_t* stats;        // points to tld stats
+  mi_delay_slot_t     reset_delay[MI_RESET_DELAY_SLOTS];
+} mi_os_tld_t;
 
 // Segments thread local data
 typedef struct mi_segments_tld_s {
@@ -408,14 +427,9 @@ typedef struct mi_segments_tld_s {
   size_t              cache_size;   // total size of all segments in the cache
   mi_segment_t*       cache;        // (small) cache of segments
   mi_stats_t*         stats;        // points to tld stats
+  mi_os_tld_t*        os;           // points to os stats
 } mi_segments_tld_t;
 
-// OS thread local data
-typedef struct mi_os_tld_s {
-  size_t              region_idx;   // start point for next allocation
-  mi_stats_t*         stats;        // points to tld stats
-} mi_os_tld_t;
-
 // Thread local data
 struct mi_tld_s {
   unsigned long long  heartbeat;     // monotonic heartbeat count
diff --git a/include/mimalloc.h b/include/mimalloc.h
index c03ddc1e..e6fa9c2b 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -275,6 +275,7 @@ typedef enum mi_option_e {
   mi_option_reset_decommits,
   mi_option_eager_commit_delay,
   mi_option_segment_reset,
+  mi_option_reset_delay,
   mi_option_os_tag,
   mi_option_max_numa_node,
   mi_option_max_errors,
diff --git a/src/heap.c b/src/heap.c
index 162cf406..d03925d5 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -149,7 +149,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
 
   // collect regions
   if (collect >= FORCE && _mi_is_main_thread()) {
-    _mi_mem_collect(&heap->tld->stats);
+    _mi_mem_collect(&heap->tld->os);
   }
 }
 
diff --git a/src/init.c b/src/init.c
index ef848de4..971a93c0 100644
--- a/src/init.c
+++ b/src/init.c
@@ -94,11 +94,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
 
 
 #define tld_main_stats  ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
+#define tld_main_os     ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os)))
 
 static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
-  { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
+  { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
   { 0, tld_main_stats },   // os
   { MI_STATS_NULL }        // stats
 };
@@ -218,6 +219,7 @@ static bool _mi_heap_init(void) {
     memset(tld, 0, sizeof(*tld));
     tld->heap_backing = heap;
     tld->segments.stats = &tld->stats;
+    tld->segments.os = &tld->os;
     tld->os.stats = &tld->stats;
     _mi_heap_default = heap;
   }
diff --git a/src/memory.c b/src/memory.c
index 75a1df92..e12405c1 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -53,6 +53,9 @@ void    _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
 void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 
+// local
+static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size);
+
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
@@ -470,16 +473,19 @@ Free
 -----------------------------------------------------------------------------*/
 
 // Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
-  mi_assert_internal(size > 0 && stats != NULL);
+void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
+  mi_assert_internal(size > 0 && tld != NULL);
   if (p==NULL) return;
   if (size==0) return;
+
+  mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
+
   size_t arena_memid = 0;
   size_t idx = 0;
   size_t bitidx = 0;
   if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) {
    // was a direct arena allocation, pass through
-    _mi_arena_free(p, size, arena_memid, stats);
+    _mi_arena_free(p, size, arena_memid, tld->stats);
   }
   else {
     // allocated in a region
@@ -512,14 +518,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
           (mi_option_is_enabled(mi_option_eager_commit) ||  // cannot reset halfway committed segments, use `option_page_reset` instead
             mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments
         {
-          _mi_os_reset(p, size, stats);
+          _mi_os_reset(p, size, tld->stats);  // cannot use delay reset! (due to concurrent allocation in the same region)
           //_mi_os_decommit(p, size, stats);  // todo: and clear dirty bits?
         }
       }
     }    
     if (!is_eager_committed) {
       // adjust commit statistics as we commit again when re-using the same slot
-      _mi_stat_decrease(&stats->committed, mi_good_commit_size(size));
+      _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size));
     }
 
     // TODO: should we free empty regions? currently only done _mi_mem_collect.
@@ -539,7 +545,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
 /* ----------------------------------------------------------------------------
   collection
 -----------------------------------------------------------------------------*/
-void _mi_mem_collect(mi_stats_t* stats) {
+void _mi_mem_collect(mi_os_tld_t* tld) {
   // free every region that has no segments in use.
   for (size_t i = 0; i < regions_count; i++) {
     mem_region_t* region = &regions[i];
@@ -554,7 +560,8 @@ void _mi_mem_collect(mi_stats_t* stats) {
         bool is_eager_committed;
         void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
         if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
-          _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats);
+          mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE);
+          _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
         }
         // and release
         mi_atomic_write(&region->info,0);
@@ -564,25 +571,123 @@ void _mi_mem_collect(mi_stats_t* stats) {
   }
 }
 
+/* ----------------------------------------------------------------------------
+  Delay slots
+-----------------------------------------------------------------------------*/
+
+typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg);
+
+static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
+  mi_msecs_t delay, uint8_t* addr, size_t size,
+  mi_delay_resolve_fun* resolve, void* arg)
+{
+  if (delay==0) {
+    resolve(addr, size, arg);
+    return;
+  }
+
+  mi_msecs_t now = _mi_clock_now();
+  mi_delay_slot_t* oldest = slots;
+  // walk through all slots, resolving expired ones.
+  // remember the oldest slot to insert the new entry in.
+  for (size_t i = 0; i < count; i++) {
+    mi_delay_slot_t* slot = &slots[i];
+    
+    if (slot->expire == 0) {
+      // empty slot
+      oldest = slot;
+    }
+    // TODO: should we handle overlapping areas too?
+    else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
+      // earlier slot encompasses new area, increase expiration
+      slot->expire = now + delay;
+      delay = 0; 
+    }
+    else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) {
+      // new one encompasses old slot, overwrite
+      slot->expire = now + delay;
+      slot->addr = addr;
+      slot->size = size;
+      delay = 0;
+    }
+    else if (slot->expire < now) {
+      // expired slot, resolve now
+      slot->expire = 0;
+      resolve(slot->addr, slot->size, arg);
+    }
+    else if (oldest->expire > slot->expire) {  
+      oldest = slot;
+    }
+  }
+  if (delay>0) {
+    // not yet registered, use the oldest slot
+    if (oldest->expire > 0) { 
+      resolve(oldest->addr, oldest->size, arg);  // evict if not empty
+    }
+    oldest->expire = now + delay;
+    oldest->addr = addr;
+    oldest->size = size;
+  }
+}
+
+static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size)
+{
+  uint8_t* addr = (uint8_t*)p;
+  bool done = false;
+  // walk through all slots
+  for (size_t i = 0; i < count; i++) {
+    mi_delay_slot_t* slot = &slots[i];
+    if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
+      // earlier slot encompasses the area; remove it
+      slot->expire = 0;
+      done = true;
+    }
+    else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) {
+      // new one encompasses old slot, remove it
+      slot->expire = 0;
+    }
+    else if ((addr <= slot->addr && addr + size > slot->addr) ||
+             (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
+      // partial overlap, remove slot
+      mi_assert_internal(false); 
+      slot->expire = 0;
+    }
+  }
+  return done;
+}
+
+static void mi_resolve_reset(void* p, size_t size, void* vtld) {
+  mi_os_tld_t* tld = (mi_os_tld_t*)vtld;
+  _mi_os_reset(p, size, tld->stats);
+}
+
+bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
+  mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay),
+    (uint8_t*)p, size, &mi_resolve_reset, tld);
+  return true;
+}
+
+bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
+  if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) {
+    return _mi_os_unreset(p, size, is_zero, tld->stats);
+  }
+  return true;
+}
+
+
 
 /* ----------------------------------------------------------------------------
   Other
 -----------------------------------------------------------------------------*/
 
-bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) {
-  return _mi_os_commit(p, size, is_zero, stats);
+bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
+  mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
+  return _mi_os_commit(p, size, is_zero, tld->stats);
 }
 
-bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) {
-  return _mi_os_decommit(p, size, stats);
-}
-
-bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) {
-  return _mi_os_reset(p, size, stats);
-}
-
-bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) {
-  return _mi_os_unreset(p, size, is_zero, stats);
+bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
+  mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
+  return _mi_os_decommit(p, size, tld->stats);
 }
 
 bool _mi_mem_protect(void* p, size_t size) {
diff --git a/src/options.c b/src/options.c
index 63b1612a..e098af0b 100644
--- a/src/options.c
+++ b/src/options.c
@@ -70,6 +70,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // note: cannot enable this if secure is on
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
+  { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 256, UNINIT, MI_OPTION(max_numa_node) },     // maximum allowed numa node
   { 16, UNINIT, MI_OPTION(max_errors) }          // maximum errors that are output
diff --git a/src/segment.c b/src/segment.c
index 178e0eda..b9abe2b3 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -234,7 +234,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
     mi_assert_internal(!segment->mem_is_fixed);
     _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
   }
-  _mi_mem_free(segment, segment_size, segment->memid, tld->stats);
+  _mi_mem_free(segment, segment_size, segment->memid, tld->os);
 }
 
 
@@ -281,7 +281,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
   }
   mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
   if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) {
-    _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
+    _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->os);
   }
   segment->next = tld->cache;
   tld->cache = segment;
@@ -346,13 +346,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     }
     if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) {
       mi_assert_internal(!segment->mem_is_fixed);
-      _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats);
+      _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os);
       segment->mem_is_committed = true;
     }
     if (!segment->mem_is_fixed &&
         (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) {
       bool reset_zero = false;
-      _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats);
+      _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os);
       if (reset_zero) is_zero = true;
     }
   }
@@ -365,7 +365,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     if (!commit) {
       // ensure the initial info is committed
       bool commit_zero = false;
-      _mi_mem_commit(segment, info_size, &commit_zero, tld->stats);
+      _mi_mem_commit(segment, info_size, &commit_zero, tld->os);
       if (commit_zero) is_zero = true;
     }
     segment->memid = memid;
@@ -459,7 +459,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) {
   return (segment->used < segment->capacity);
 }
 
-static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) {
+static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
   mi_assert_expensive(mi_segment_is_valid(segment));
   for (size_t i = 0; i < segment->capacity; i++) {
@@ -472,14 +472,14 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats)
           mi_assert_internal(!segment->mem_is_fixed);
           page->is_committed = true;
           bool is_zero = false;
-          _mi_mem_commit(start,psize,&is_zero,stats);
+          _mi_mem_commit(start,psize,&is_zero,tld->os);
           if (is_zero) page->is_zero_init = true;
         }
         if (page->is_reset) {
           mi_assert_internal(!segment->mem_is_fixed);
           page->is_reset = false;
           bool is_zero = false;
-          _mi_mem_unreset(start, psize, &is_zero, stats);
+          _mi_mem_unreset(start, psize, &is_zero, tld->os);
           if (is_zero) page->is_zero_init = true;
         }
       }
@@ -497,21 +497,20 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats)
 
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
 
-static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) {
-  UNUSED(stats);
+static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert_internal(page->segment_in_use);
   mi_assert_internal(mi_page_all_free(page));
   mi_assert_internal(page->is_committed);
   size_t inuse = page->capacity * page->block_size;
-  _mi_stat_decrease(&stats->page_committed, inuse);
-  _mi_stat_decrease(&stats->pages, 1);
+  _mi_stat_decrease(&tld->stats->page_committed, inuse);
+  _mi_stat_decrease(&tld->stats->pages, 1);
   
   // reset the page memory to reduce memory pressure?
   if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
     size_t psize;
     uint8_t* start = _mi_page_start(segment, page, &psize);
     page->is_reset = true;
-    _mi_mem_reset(start, psize, stats);
+    _mi_mem_reset(start, psize, tld->os);
   }
 
   // zero the page data, but not the segment fields
@@ -529,7 +528,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
   mi_assert_expensive(mi_segment_is_valid(segment));
 
   // mark it as free now
-  mi_segment_page_clear(segment, page, tld->stats);
+  mi_segment_page_clear(segment, page, tld);
 
   if (segment->used == 0) {
     // no more used pages; remove from the free list and free the segment
@@ -634,7 +633,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
         if (mi_page_all_free(page)) {
           // if everything free by now, free the page
-          mi_segment_page_clear(segment,page,tld->stats);
+          mi_segment_page_clear(segment,page,tld);
         }
         else {
           // otherwise reclaim it
@@ -666,7 +665,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 // Requires that the page has free pages
 static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
-  mi_page_t* page = mi_segment_find_free(segment, tld->stats);
+  mi_page_t* page = mi_segment_find_free(segment, tld);
   page->segment_in_use = true;  
   segment->used++;
   mi_assert_internal(segment->used <= segment->capacity);
diff --git a/src/stats.c b/src/stats.c
index 011fab64..cb6d8866 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char*
   const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); 
   const long avg_whole = (long)(avg_tens/10);
   const long avg_frac1 = (long)(avg_tens%10);
-  _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1);
+  _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
 }
 
 

From 288726606390edb4ffb9664b9bce0271516b550d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 6 Nov 2019 14:17:36 -0800
Subject: [PATCH 022/104] optimize get numa node for single node systems

---
 src/os.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/os.c b/src/os.c
index 44ef9830..254f85f1 100644
--- a/src/os.c
+++ b/src/os.c
@@ -1046,9 +1046,10 @@ int _mi_os_numa_node_count(void) {
 
 int _mi_os_numa_node(mi_os_tld_t* tld) {
   UNUSED(tld);
-  int numa_node = mi_os_numa_nodex();
-  // never more than the node count and >= 0
   int numa_count = _mi_os_numa_node_count();
+  if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
+  // never more than the node count and >= 0
+  int numa_node = mi_os_numa_nodex();
   if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
   if (numa_node < 0) numa_node = 0;  
   return numa_node;

From 00e19cad9abd225bb4c0975c4f9b6e440a81b97c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 6 Nov 2019 21:37:23 -0800
Subject: [PATCH 023/104] refactor region code, split out atomic bitmap

---
 ide/vs2019/mimalloc-override.vcxproj |   2 +-
 ide/vs2019/mimalloc.vcxproj          |   3 +-
 include/mimalloc-atomic.h            |  31 ++-
 src/bitmap.inc.c                     | 160 +++++++++++++
 src/memory.c                         | 339 ++++++++++-----------------
 5 files changed, 318 insertions(+), 217 deletions(-)
 create mode 100644 src/bitmap.inc.c

diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index 09fd37fb..e1c7535c 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -123,7 +123,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <SupportJustMyCode>false</SupportJustMyCode>
       <CompileAs>Default</CompileAs>
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index 1fabff5e..19696c10 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -116,7 +116,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=1;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp17</LanguageStandard>
@@ -218,6 +218,7 @@
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
     <ClCompile Include="..\..\src\arena.c" />
+    <ClCompile Include="..\..\src\bitmap.inc.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index dff0f011..c18f990f 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -36,6 +36,13 @@ static inline void mi_atomic_add64(volatile int64_t* p, int64_t add);
 // Atomically add a value; returns the previous value. Memory ordering is relaxed.
 static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add);
 
+// Atomically "and" a value; returns the previous value. Memory ordering is relaxed.
+static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x);
+
+// Atomically "or" a value; returns the previous value. Memory ordering is relaxed.
+static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x);
+
+
 // Atomically compare and exchange a value; returns `true` if successful. 
 // May fail spuriously. Memory ordering as release on success, and relaxed on failure.
 // (Note: expected and desired are in opposite order from atomic_compare_exchange)
@@ -121,22 +128,28 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc
 #include <intrin.h>
 #ifdef _WIN64
 typedef LONG64   msc_intptr_t;
-#define RC64(f)  f##64
+#define MI_64(f) f##64
 #else
 typedef LONG     msc_intptr_t;
-#define RC64(f)  f
+#define MI_64(f) f
 #endif
 static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) {
-  return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
+  return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
+}
+static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
+  return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
+}
+static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
+  return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
 }
 static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
-  return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
+  return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
 }
 static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
   return mi_atomic_cas_strong(p,desired,expected);
 }
 static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
-  return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
+  return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
 }
 static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) {
   return *p;
@@ -177,6 +190,14 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add
   MI_USING_STD
   return atomic_fetch_add_explicit(p, add, memory_order_relaxed);
 }
+static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
+  MI_USING_STD
+  return atomic_fetch_and_explicit(p, x, memory_order_relaxed);
+}
+static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
+  MI_USING_STD
+  return atomic_fetch_or_explicit(p, x, memory_order_relaxed);
+}
 static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
   MI_USING_STD
   return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed);
diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c
new file mode 100644
index 00000000..5bea4748
--- /dev/null
+++ b/src/bitmap.inc.c
@@ -0,0 +1,160 @@
+#pragma once
+#ifndef MI_BITMAP_H
+#define MI_BITMAP_H
+
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+
+// Use bit scan forward to quickly find the first zero bit if it is available
+#if defined(_MSC_VER)
+#define MI_HAVE_BITSCAN
+#include <intrin.h>
+static inline size_t mi_bsf(uintptr_t x) {
+  if (x==0) return 8*MI_INTPTR_SIZE;
+  DWORD idx;
+  MI_64(_BitScanForward)(&idx, x);
+  return idx;
+}
+static inline size_t mi_bsr(uintptr_t x) {
+  if (x==0) return 8*MI_INTPTR_SIZE;
+  DWORD idx;
+  MI_64(_BitScanReverse)(&idx, x);
+  return idx;
+}
+#elif defined(__GNUC__) || defined(__clang__)
+#define MI_HAVE_BITSCAN
+#if (INTPTR_MAX == LONG_MAX)
+# define MI_L(x)  x##l
+#else
+# define MI_L(x)  x##ll
+#endif
+static inline size_t mi_bsf(uintptr_t x) {
+  return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
+}
+static inline size_t mi_bsr(uintptr_t x) {
+  return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
+}
+#endif
+
+
+#define MI_BITMAP_FIELD_BITS   (8*MI_INTPTR_SIZE)
+#define MI_BITMAP_FIELD_FULL   (~((uintptr_t)0))   // all bits set
+
+// An atomic bitmap of `uintptr_t` fields
+typedef volatile _Atomic(uintptr_t)  mi_bitmap_field_t;
+typedef mi_bitmap_field_t*           mi_bitmap_t;
+
+// A bitmap index is the index of the bit in a bitmap.
+typedef size_t mi_bitmap_index_t;
+
+// Create a bit index.
+static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
+  mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
+  return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
+}
+
+// Get the field index from a bit index.
+static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
+  return (bitmap_idx / MI_BITMAP_FIELD_BITS);
+}
+
+// Get the bit index in a bitmap field
+static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
+  return (bitmap_idx % MI_BITMAP_FIELD_BITS);
+}
+
+// The bit mask for a given number of blocks at a specified bit index.
+static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
+  mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
+  return ((((uintptr_t)1 << count) - 1) << bitidx);
+}
+
+// Try to atomically claim a sequence of `count` bits in a single field at `idx` in `bitmap`.
+// Returns `true` on success.
+static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) 
+{  
+  mi_assert_internal(bitmap_idx != NULL);
+  volatile _Atomic(uintptr_t)* field = &bitmap[idx];
+  uintptr_t map  = mi_atomic_read(field);
+  if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
+
+  // search for 0-bit sequence of length count
+  const uintptr_t mask = mi_bitmap_mask_(count, 0);
+  const size_t    bitidx_max = MI_BITMAP_FIELD_BITS - count;
+
+#ifdef MI_HAVE_BITSCAN
+  size_t bitidx = mi_bsf(~map);    // quickly find the first zero bit if possible
+#else
+  size_t bitidx = 0;               // otherwise start at 0
+#endif
+  uintptr_t m = (mask << bitidx);     // invariant: m == mask shifted by bitidx
+
+  // scan linearly for a free range of zero bits
+  while (bitidx <= bitidx_max) {
+    if ((map & m) == 0) {  // are the mask bits free at bitidx?
+      mi_assert_internal((m >> bitidx) == mask); // no overflow?
+      uintptr_t newmap = map | m;
+      mi_assert_internal((newmap^map) >> bitidx == mask);
+      if (!mi_atomic_cas_weak(field, newmap, map)) {  // TODO: use strong cas here?
+        // no success, another thread claimed concurrently.. keep going
+        map = mi_atomic_read(field);
+        continue;
+      }
+      else {
+        // success, we claimed the bits!        
+        *bitmap_idx = mi_bitmap_index_create(idx, bitidx);
+        return true;
+      }
+    }
+    else {
+      // on to the next bit range
+#ifdef MI_HAVE_BITSCAN
+      size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
+      mi_assert_internal(shift > 0 && shift <= count);
+#else
+      size_t shift = 1;
+#endif
+      bitidx += shift;
+      m <<= shift;
+    }
+  }
+  // no bits found
+  return false;
+}
+
+
+// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
+// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
+static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
+  for (size_t idx = 0; idx < bitmap_fields; idx++) {
+    if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Set `count` bits at `bitmap_idx` to 0 atomically
+static inline void mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+  const size_t idx = mi_bitmap_index_field(bitmap_idx);
+  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
+  mi_assert_internal((bitmap[idx] & mask) == mask);
+  mi_atomic_and(&bitmap[idx], ~mask);
+}
+
+
+// Set `count` bits at `bitmap_idx` to 1 atomically
+// Returns `true` if all `count` bits were 0 previously
+static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+  const size_t idx = mi_bitmap_index_field(bitmap_idx);
+  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
+  // mi_assert_internal((bitmap[idx] & mask) == 0);
+  uintptr_t prev = mi_atomic_or(&bitmap[idx], mask);
+  return ((prev & mask) == 0);
+}
+
+#endif
\ No newline at end of file
diff --git a/src/memory.c b/src/memory.c
index 75a1df92..29e0e412 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -37,6 +37,8 @@ Possible issues:
 
 #include <string.h>  // memset
 
+#include "bitmap.inc.c"
+
 // Internal raw OS interface
 size_t  _mi_os_large_page_size();
 bool    _mi_os_protect(void* addr, size_t size);
@@ -56,22 +58,22 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * (1ULL << 30))  // 256GiB => 16KiB for the region map
+#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 16KiB for the region map
 #elif (MI_INTPTR_SIZE==4)
-#define MI_HEAP_REGION_MAX_SIZE    (3 * (1UL << 30))    // 3GiB => 196 bytes for the region map
+#define MI_HEAP_REGION_MAX_SIZE    (3 * GiB)    // 196 bytes for the region map
 #else
 #error "define the maximum heap space allowed for regions on this platform"
 #endif
 
 #define MI_SEGMENT_ALIGN          MI_SEGMENT_SIZE
 
-#define MI_REGION_MAP_BITS        (MI_INTPTR_SIZE * 8)
-#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS)
-#define MI_REGION_MAX_ALLOC_SIZE  ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE)  // 64MiB
-#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)
-#define MI_REGION_MAP_FULL        UINTPTR_MAX
+#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB
+#define MI_REGION_MAX_ALLOC_SIZE  (MI_REGION_SIZE/4)                          // 64MiB
+#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  
 
 
+// Region info is a pointer to the memory region and two bits for 
+// its flags: is_large, and is_committed.
 typedef uintptr_t mi_region_info_t;
 
 static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) {
@@ -88,19 +90,22 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b
 // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
-  volatile _Atomic(uintptr_t)        map;   // in-use bit per MI_SEGMENT_SIZE block
-  volatile _Atomic(mi_region_info_t) info;  // start of virtual memory area, and flags
-  volatile _Atomic(uintptr_t)        dirty_mask; // bit per block if the contents are not zero'd
+  volatile _Atomic(mi_region_info_t) info;       // start of the memory area (and flags)
   volatile _Atomic(uintptr_t)        numa_node;  // associated numa node + 1 (so 0 is no association)
-  size_t   arena_memid;  // if allocated from a (huge page) arena
+  size_t   arena_memid;                          // if allocated from a (huge page) arena
 } mem_region_t;
 
-
 // The region map; 16KiB for a 256GiB HEAP_REGION_MAX
-// TODO: in the future, maintain a map per NUMA node for numa aware allocation
 static mem_region_t regions[MI_REGION_MAX];
 
-static volatile _Atomic(uintptr_t) regions_count; // = 0;        // allocated regions
+// A bit mask per region for its claimed MI_SEGMENT_SIZE blocks.
+static mi_bitmap_field_t regions_map[MI_REGION_MAX];
+
+// A bit mask per region to track which blocks are dirty (= potentially written to)
+static mi_bitmap_field_t regions_dirty[MI_REGION_MAX];
+
+// Allocated regions
+static volatile _Atomic(uintptr_t) regions_count; // = 0;        
 
 
 /* ----------------------------------------------------------------------------
@@ -113,12 +118,6 @@ static size_t mi_region_block_count(size_t size) {
   return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE;
 }
 
-// The bit mask for a given number of blocks at a specified bit index.
-static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) {
-  mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS);
-  return ((((uintptr_t)1 << blocks) - 1) << bitidx);
-}
-
 // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
 static size_t mi_good_commit_size(size_t size) {
   if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
@@ -137,8 +136,8 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
 }
 
 
-static size_t mi_memid_create(size_t idx, size_t bitidx) {
-  return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1;
+static size_t mi_memid_create(mi_bitmap_index_t bitmap_idx) {
+  return bitmap_idx<<1;
 }
 
 static size_t mi_memid_create_from_arena(size_t arena_memid) {
@@ -149,78 +148,57 @@ static bool mi_memid_is_arena(size_t id) {
   return ((id&1)==1);
 }
 
-static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) {
+static bool mi_memid_indices(size_t id, mi_bitmap_index_t* bitmap_idx, size_t* arena_memid) {
   if (mi_memid_is_arena(id)) {
     *arena_memid = (id>>1);
     return true;
   }
   else {
-    *idx = ((id>>1) / MI_REGION_MAP_BITS);
-    *bitidx = ((id>>1) % MI_REGION_MAP_BITS);
+    *bitmap_idx = (mi_bitmap_index_t)(id>>1);
     return false;
   }
 }
 
 /* ----------------------------------------------------------------------------
-Commit from a region
+  Ensure a region is allocated from the OS (or an arena)
 -----------------------------------------------------------------------------*/
 
-// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`.
-// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
-// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
-// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, 
-                                    size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
+static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_info_t* pinfo, mi_os_tld_t* tld)
 {
-  size_t mask = mi_region_block_mask(blocks,bitidx);
-  mi_assert_internal(mask != 0);
-  mi_assert_internal((mask & mi_atomic_read_relaxed(&region->map)) == mask);
-  mi_assert_internal(&regions[idx] == region);
-
   // ensure the region is reserved
-  mi_region_info_t info = mi_atomic_read(&region->info);
-  if (info == 0) 
+  mi_region_info_t info = mi_atomic_read(&regions[idx].info);
+  if (mi_unlikely(info == 0))
   {
     bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
-    bool region_large  = *allow_large;
+    bool region_large = allow_large;
+    bool is_zero = false;
     size_t arena_memid = 0;
-    void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, is_zero, &arena_memid, tld);
-    /*
-    void* start = NULL;
-    if (region_large) {
-      start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN);
-      if (start != NULL) { region_commit = true; }
-    }
-    if (start == NULL) {
-      start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
-    }
-    */
-    mi_assert_internal(!(region_large && !*allow_large));
+    void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
+    mi_assert_internal(!(region_large && !allow_large));
 
     if (start == NULL) {
-      // failure to allocate from the OS! unclaim the blocks and fail
-      size_t map;
-      do {
-        map = mi_atomic_read_relaxed(&region->map);
-      } while (!mi_atomic_cas_weak(&region->map, map & ~mask, map));
+      // failure to allocate from the OS! fail
+      *pinfo = 0;
       return false;
     }
 
     // set the newly allocated region
-    info = mi_region_info_create(start,region_large,region_commit);
-    if (mi_atomic_cas_strong(&region->info, info, 0)) {
+    info = mi_region_info_create(start, region_large, region_commit);
+    if (mi_atomic_cas_strong(&regions[idx].info, info, 0)) {
       // update the region count
-      region->arena_memid = arena_memid;
-      mi_atomic_write(&region->numa_node, _mi_os_numa_node(tld) + 1);
+      regions[idx].arena_memid = arena_memid;
+      mi_atomic_write(&regions[idx].numa_node, _mi_os_numa_node(tld) + 1);
+      mi_atomic_write(&regions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0));
       mi_atomic_increment(&regions_count);
     }
     else {
       // failed, another thread allocated just before us!
       // we assign it to a later slot instead (up to 4 tries).
-      for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
+      for (size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
         if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
           regions[idx+i].arena_memid = arena_memid;
           mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
+          mi_atomic_write(&regions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0));
           mi_atomic_increment(&regions_count);
           start = NULL;
           break;
@@ -232,27 +210,33 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
         // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
       }
       // and continue with the memory at our index
-      info = mi_atomic_read(&region->info);
+      info = mi_atomic_read(&regions[idx].info);
     }
   }
-  mi_assert_internal(info == mi_atomic_read(&region->info));
+  mi_assert_internal(info == mi_atomic_read(&regions[idx].info));
   mi_assert_internal(info != 0);
+  *pinfo = info;
+  return true;
+}
+
+
+/* ----------------------------------------------------------------------------
+  Commit blocks
+-----------------------------------------------------------------------------*/
+
+static void* mi_region_commit_blocks(mi_bitmap_index_t bitmap_idx, mi_region_info_t info, size_t blocks, size_t size, bool* commit, bool* is_large, bool* is_zero, mi_os_tld_t* tld)
+{
+  // set dirty bits
+  *is_zero = mi_bitmap_claim(regions_dirty, MI_REGION_MAX, blocks, bitmap_idx);
 
   // Commit the blocks to memory
   bool region_is_committed = false;
   bool region_is_large = false;
-  void* start = mi_region_info_read(info,&region_is_large,&region_is_committed);  
-  mi_assert_internal(!(region_is_large && !*allow_large));
+  void* start = mi_region_info_read(info, &region_is_large, &region_is_committed);
+  mi_assert_internal(!(region_is_large && !*is_large));
   mi_assert_internal(start!=NULL);
 
-  // set dirty bits
-  uintptr_t m;
-  do {
-    m = mi_atomic_read(&region->dirty_mask);
-  } while (!mi_atomic_cas_weak(&region->dirty_mask, m | mask, m));
-  *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range?
-
-  void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
+  void* blocks_start = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bitmap_idx) * MI_SEGMENT_SIZE);
   if (*commit && !region_is_committed) {
     // ensure commit 
     bool commit_zero = false;
@@ -266,99 +250,58 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
 
   // and return the allocation  
   mi_assert_internal(blocks_start != NULL);
-  *allow_large = region_is_large;
-  *p  = blocks_start;
-  *id = mi_memid_create(idx, bitidx); 
+  *is_large = region_is_large;
+  return blocks_start;
+}
+
+/* ----------------------------------------------------------------------------
+  Claim and allocate blocks in a region
+-----------------------------------------------------------------------------*/
+
+static bool mi_region_alloc_blocks(
+  size_t idx, size_t blocks, size_t size,
+  bool* commit, bool* allow_large, bool* is_zero,
+  void** p, size_t* id, mi_os_tld_t* tld)
+{
+  mi_bitmap_index_t bitmap_idx;
+  if (!mi_bitmap_try_claim_field(regions_map, idx, blocks, &bitmap_idx)) {
+    return true; // no error, but also no success
+  }
+  mi_region_info_t info;
+  if (!mi_region_ensure_allocated(idx,*allow_large,&info,tld)) {
+    // failed to allocate region memory, unclaim the bits and fail
+    mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx);
+    return false;
+  }
+  *p = mi_region_commit_blocks(bitmap_idx,info,blocks,size,commit,allow_large,is_zero,tld);
+  *id = mi_memid_create(bitmap_idx);
   return true;
 }
 
-// Use bit scan forward to quickly find the first zero bit if it is available
-#if defined(_MSC_VER)
-#define MI_HAVE_BITSCAN
-#include <intrin.h>
-static inline size_t mi_bsf(uintptr_t x) {
-  if (x==0) return 8*MI_INTPTR_SIZE;
-  DWORD idx;
-  #if (MI_INTPTR_SIZE==8)
-  _BitScanForward64(&idx, x);
-  #else
-  _BitScanForward(&idx, x);
-  #endif
-  return idx;
-}
-static inline size_t mi_bsr(uintptr_t x) {
-  if (x==0) return 8*MI_INTPTR_SIZE;
-  DWORD idx;
-  #if (MI_INTPTR_SIZE==8)
-  _BitScanReverse64(&idx, x);
-  #else
-  _BitScanReverse(&idx, x);
-  #endif
-  return idx;
-}
-#elif defined(__GNUC__) || defined(__clang__)
-#define MI_HAVE_BITSCAN
-static inline size_t mi_bsf(uintptr_t x) {
-  return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x));
-}
-static inline size_t mi_bsr(uintptr_t x) {
-  return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x));
-}
-#endif
 
-// Allocate `blocks` in a `region` at `idx` of a given `size`.
-// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
-// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
-// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, 
-                                   bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
-{
-  mi_assert_internal(p != NULL && id != NULL);
-  mi_assert_internal(blocks < MI_REGION_MAP_BITS);
+/* ----------------------------------------------------------------------------
+  Try to allocate blocks in suitable regions
+-----------------------------------------------------------------------------*/
 
-  const uintptr_t mask = mi_region_block_mask(blocks, 0);
-  const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
-  uintptr_t map = mi_atomic_read(&region->map);
-  if (map==MI_REGION_MAP_FULL) return true;
-
-  #ifdef MI_HAVE_BITSCAN
-  size_t bitidx = mi_bsf(~map);    // quickly find the first zero bit if possible
-  #else
-  size_t bitidx = 0;               // otherwise start at 0
-  #endif
-  uintptr_t m = (mask << bitidx);     // invariant: m == mask shifted by bitidx
-
-  // scan linearly for a free range of zero bits
-  while(bitidx <= bitidx_max) {
-    if ((map & m) == 0) {  // are the mask bits free at bitidx?
-      mi_assert_internal((m >> bitidx) == mask); // no overflow?
-      uintptr_t newmap = map | m;
-      mi_assert_internal((newmap^map) >> bitidx == mask);
-      if (!mi_atomic_cas_weak(&region->map, newmap, map)) {  // TODO: use strong cas here?
-        // no success, another thread claimed concurrently.. keep going
-        map = mi_atomic_read(&region->map);
-        continue;
-      }
-      else {
-        // success, we claimed the bits
-        // now commit the block memory -- this can still fail
-        return mi_region_commit_blocks(region, idx, bitidx, blocks, 
-                                       size, commit, allow_large, is_zero, p, id, tld);
-      }
-    }
-    else {
-      // on to the next bit range
-      #ifdef MI_HAVE_BITSCAN
-      size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
-      mi_assert_internal(shift > 0 && shift <= blocks);
-      #else
-      size_t shift = 1;
-      #endif
-      bitidx += shift;
-      m <<= shift;
-    }
+static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool allow_large ) {
+  uintptr_t m = mi_atomic_read_relaxed(&regions_map[idx]);
+  if (m == MI_BITMAP_FIELD_FULL) return false;
+  if (numa_node >= 0) {  // use negative numa node to always succeed
+    int rnode = ((int)mi_atomic_read_relaxed(&regions->numa_node)) - 1;
+    if (rnode != numa_node) return false;
+  }
+  if (mi_unlikely(!(commit || allow_large))) {
+    // otherwise skip incompatible regions if possible. 
+    // this is not guaranteed due to multiple threads allocating at the same time but
+    // that's ok. In secure mode, large is never allowed for any thread, so that works out; 
+    // otherwise we might just not be able to reset/decommit individual pages sometimes.
+    mi_region_info_t info = mi_atomic_read_relaxed(&regions->info);
+    bool is_large;
+    bool is_committed;
+    void* start = mi_region_info_read(info, &is_large, &is_committed);
+    bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation?
+    if (!ok) return false;
   }
-  // no error, but also no bits found
   return true;
 }
 
@@ -366,33 +309,15 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
 // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks, size_t size,
+static bool mi_region_try_alloc_blocks(
+  int numa_node, size_t idx, size_t blocks, size_t size,
   bool* commit, bool* allow_large, bool* is_zero,
   void** p, size_t* id, mi_os_tld_t* tld)
 {
   // check if there are available blocks in the region..
   mi_assert_internal(idx < MI_REGION_MAX);
-  mem_region_t* region = &regions[idx];
-  uintptr_t m = mi_atomic_read_relaxed(&region->map);
-  int rnode = ((int)mi_atomic_read_relaxed(&region->numa_node)) - 1;
-  if ((rnode < 0 || rnode == numa_node) &&  // fits current numa node
-      (m != MI_REGION_MAP_FULL))            // and some bits are zero    
-  {
-    bool ok = (*commit || *allow_large); // committing or allow-large is always ok
-    if (!ok) {
-      // otherwise skip incompatible regions if possible. 
-      // this is not guaranteed due to multiple threads allocating at the same time but
-      // that's ok. In secure mode, large is never allowed for any thread, so that works out; 
-      // otherwise we might just not be able to reset/decommit individual pages sometimes.
-      mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
-      bool is_large;
-      bool is_committed;
-      void* start = mi_region_info_read(info,&is_large,&is_committed);
-      ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation?
-    }
-    if (ok) {
-      return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld);
-    }
+  if (mi_region_is_suitable(numa_node, idx, *commit, *allow_large)) {
+    return mi_region_alloc_blocks(idx, blocks, size, commit, allow_large, is_zero, p, id, tld);
   }
   return true;  // no error, but no success either
 }
@@ -426,14 +351,14 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
   size = _mi_align_up(size, _mi_os_page_size());
 
   // calculate the number of needed blocks
-  size_t blocks = mi_region_block_count(size);
+  const size_t blocks = mi_region_block_count(size);
   mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
 
   // find a range of free blocks
-  int numa_node = _mi_os_numa_node(tld);
+  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
   void* p = NULL;
-  size_t count = mi_atomic_read(&regions_count);
-  size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
+  const size_t count = mi_atomic_read(&regions_count);
+  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? 
   for (size_t visited = 0; visited < count; visited++, idx++) {
     if (idx >= count) idx = 0;  // wrap around
     if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
@@ -456,7 +381,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
     *id = mi_memid_create_from_arena(arena_memid);
   }
   else {
-    tld->region_idx = idx;  // next start of search? currently not used as we use first-fit
+    tld->region_idx = idx;  // next start of search
   }
 
   mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
@@ -475,9 +400,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
   if (p==NULL) return;
   if (size==0) return;
   size_t arena_memid = 0;
-  size_t idx = 0;
-  size_t bitidx = 0;
-  if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) {
+  mi_bitmap_index_t bitmap_idx;
+  if (mi_memid_indices(id,&bitmap_idx,&arena_memid)) {
    // was a direct arena allocation, pass through
     _mi_arena_free(p, size, arena_memid, stats);
   }
@@ -487,11 +411,11 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     // we can align the size up to page size (as we allocate that way too)
     // this ensures we fully commit/decommit/reset
     size = _mi_align_up(size, _mi_os_page_size());    
-    size_t blocks = mi_region_block_count(size);
-    size_t mask = mi_region_block_mask(blocks, bitidx);
+    const size_t blocks = mi_region_block_count(size);
+    const size_t idx    = mi_bitmap_index_field(bitmap_idx);
+    const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
     mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
     mem_region_t* region = &regions[idx];
-    mi_assert_internal((mi_atomic_read_relaxed(&region->map) & mask) == mask ); // claimed?
     mi_region_info_t info = mi_atomic_read(&region->info);
     bool is_large;
     bool is_eager_committed;
@@ -499,8 +423,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     mi_assert_internal(start != NULL);
     void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
     mi_assert_internal(blocks_start == p); // not a pointer in our area?
-    mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS);
-    if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`?
+    mi_assert_internal(bitidx + blocks <= MI_BITMAP_FIELD_BITS);
+    if (blocks_start != p || bitidx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
 
     // decommit (or reset) the blocks to reduce the working set.
     // TODO: implement delayed decommit/reset as these calls are too expensive
@@ -526,12 +450,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     // this frees up virtual address space which might be useful on 32-bit systems?
 
     // and unclaim
-    uintptr_t map;
-    uintptr_t newmap;
-    do {
-      map = mi_atomic_read_relaxed(&region->map);
-      newmap = map & ~mask;
-    } while (!mi_atomic_cas_weak(&region->map, newmap, map));
+    mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx);
   }
 }
 
@@ -542,23 +461,23 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
 void _mi_mem_collect(mi_stats_t* stats) {
   // free every region that has no segments in use.
   for (size_t i = 0; i < regions_count; i++) {
-    mem_region_t* region = &regions[i];
-    if (mi_atomic_read_relaxed(&region->map) == 0) {
+    if (mi_atomic_read_relaxed(&regions_map[i]) == 0) {
       // if no segments used, try to claim the whole region
       uintptr_t m;
       do {
-        m = mi_atomic_read_relaxed(&region->map);
-      } while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
+        m = mi_atomic_read_relaxed(&regions_map[i]);
+      } while(m == 0 && !mi_atomic_cas_weak(&regions_map[i], MI_BITMAP_FIELD_FULL, 0 ));
       if (m == 0) {
         // on success, free the whole region
         bool is_eager_committed;
-        void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
+        void* start = mi_region_info_read(mi_atomic_read(&regions[i].info), NULL, &is_eager_committed);
         if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
-          _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats);
+          _mi_arena_free(start, MI_REGION_SIZE, regions[i].arena_memid, stats);
         }
         // and release
-        mi_atomic_write(&region->info,0);
-        mi_atomic_write(&region->map,0);
+        mi_atomic_write(&regions[i].info,0);
+        mi_atomic_write(&regions_dirty[i],0);
+        mi_atomic_write(&regions_map[i],0);
       }
     }
   }

From b09282bc0d6e3228c556eac833331438dbe774be Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 6 Nov 2019 22:49:01 -0800
Subject: [PATCH 024/104] change arena allocator to atomic bitmap as well

---
 include/mimalloc.h |   4 +-
 src/arena.c        | 268 +++++++++++++--------------------------------
 src/bitmap.inc.c   |   6 +-
 src/init.c         |   4 +-
 src/os.c           |  20 ++--
 5 files changed, 94 insertions(+), 208 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index c03ddc1e..70b6e412 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -230,8 +230,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
 mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
 mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
 
-mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept;
-mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept;
+mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept;
+mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
 
 // deprecated
 mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
diff --git a/src/arena.c b/src/arena.c
index e58d2c47..b807cd47 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -7,15 +7,19 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* ----------------------------------------------------------------------------
 "Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to
+large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to
 allocate in one arena consisting of huge OS pages -- otherwise it 
 delegates to direct allocation from the OS.
 
 In the future, we can expose an API to manually add more arenas which
 is sometimes needed for embedded devices or shared memory for example.
 
-The arena allocation needs to be thread safe and we use a lock-free scan
-with on-demand coalescing.
+The arena allocation needs to be thread safe and we use an atomic
+bitmap to allocate. The current implementation of the bitmap can
+only do this within a field (`uintptr_t`) so we can allocate at most
+blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
+can lead to fragmentation but fortunately most objects will be regions
+of 256MiB in practice.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
@@ -23,6 +27,8 @@ with on-demand coalescing.
 
 #include <string.h>  // memset
 
+#include "bitmap.inc.c"  // atomic bitmap
+
 // os.c
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
@@ -36,9 +42,11 @@ int   _mi_os_numa_node_count(void);
   Arena allocation
 ----------------------------------------------------------- */
 
-#define MI_SEGMENT_ALIGN     MI_SEGMENT_SIZE
-#define MI_ARENA_BLOCK_SIZE  (4*MI_SEGMENT_ALIGN)  // 16MiB
-#define MI_MAX_ARENAS        (64)
+#define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
+#define MI_ARENA_BLOCK_SIZE   (8*MI_SEGMENT_ALIGN)     // 32MiB
+#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE)  // 2GiB
+#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 16MiB
+#define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
 
 // Block info: bit 0 contains the `in_use` bit, the upper bits the
 // size in count of arena blocks.
@@ -48,11 +56,13 @@ typedef uintptr_t mi_block_info_t;
 typedef struct mi_arena_s {
   uint8_t* start;                         // the start of the memory area
   size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
+  size_t   field_count;                   // number of bitmap fields
   int      numa_node;                     // associated NUMA node
   bool     is_zero_init;                  // is the arena zero initialized?
   bool     is_large;                      // large OS page allocated
-  _Atomic(uintptr_t)       block_bottom;  // optimization to start the search for free blocks
-  _Atomic(mi_block_info_t) blocks[1];     // `block_count` block info's
+  volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
+  mi_bitmap_field_t* blocks_dirty;         // are the blocks potentially non-zero?
+  mi_bitmap_field_t  blocks_map[1];        // bitmap of in-use blocks 
 } mi_arena_t;
 
 
@@ -69,180 +79,55 @@ static _Atomic(uintptr_t)   mi_arena_count; // = 0
 // Use `0` as a special id for direct OS allocated memory.
 #define MI_MEMID_OS   0
 
-static size_t mi_memid_create(size_t arena_index, size_t block_index) {
+static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
   mi_assert_internal(arena_index < 0xFE);
-  return ((block_index << 8) | ((arena_index+1) & 0xFF));
+  return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
 }
 
-static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) {
+static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
   mi_assert_internal(memid != MI_MEMID_OS);
   *arena_index = (memid & 0xFF) - 1;
-  *block_index = (memid >> 8);
+  *bitmap_index = (memid >> 8);
 }
 
-/* -----------------------------------------------------------
-  Block info
------------------------------------------------------------ */
 
-static bool mi_block_is_in_use(mi_block_info_t info) {
-  return ((info&1) != 0);
+static size_t mi_arena_block_count_of_size(size_t size) {
+  const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
+  const size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
+  return bcount;
 }
 
-static size_t mi_block_count(mi_block_info_t info) {
-  return (info>>1);
-}
-
-static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) {
-  return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0));
-}
-
-
 /* -----------------------------------------------------------
   Thread safe allocation in an arena
 ----------------------------------------------------------- */
-
-static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index)
+static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx) 
 {
-  // Scan linearly through all block info's
-  // Skipping used ranges, coalescing free ranges on demand.
-  mi_assert_internal(needed_bcount > 0);
-  mi_assert_internal(start_idx <= arena->block_count);
-  mi_assert_internal(end_idx <= arena->block_count);
-  _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx];
-  _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx];
-  while (block < end) {
-    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
-    size_t bcount = mi_block_count(binfo);
-    if (mi_block_is_in_use(binfo)) {
-      // in-use, skip ahead
-      mi_assert_internal(bcount > 0);
-      block += bcount;
-    }
-    else {
-      // free blocks
-      if (bcount==0) {
-        // optimization:
-        // use 0 initialized blocks at the end, to use single atomic operation
-        // initially to reduce contention (as we don't need to split)
-        if (block + needed_bcount > end) {
-          return NULL; // does not fit
-        }
-        else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) {
-          // ouch, someone else was quicker. Try again..
-          continue;
-        }
-        else {
-          // we got it: return a pointer to the claimed memory
-          ptrdiff_t idx = (block - arena->blocks);
-          *is_zero = arena->is_zero_init;
-          *block_index = idx;
-          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
-        }
-      }
-
-      mi_assert_internal(bcount>0);
-      if (needed_bcount > bcount) {
-#if 0 // MI_NO_ARENA_COALESCE
-        block += bcount; // too small, skip to the next range
-        continue;
-#else
-        // too small, try to coalesce
-        _Atomic(mi_block_info_t)* block_next = block + bcount;
-        if (block_next >= end) {
-          return NULL; // does not fit
-        }
-        mi_block_info_t binfo_next = mi_atomic_read(block_next);
-        size_t bcount_next = mi_block_count(binfo_next);
-        if (mi_block_is_in_use(binfo_next)) {
-          // next block is in use, cannot coalesce
-          block += (bcount + bcount_next); // skip ahea over both blocks
-        }
-        else {
-          // next block is free, try to coalesce
-          // first set the next one to being used to prevent dangling ranges
-          if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) {
-            // someone else got in before us.. try again
-            continue;
-          }
-          else {
-            if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) {  // use strong to increase success chance
-              // someone claimed/coalesced the block in the meantime
-              // first free the next block again..
-              bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong
-              mi_assert(ok); UNUSED(ok);
-              // and try again
-              continue;
-            }
-            else {
-              // coalesced! try again
-              // todo: we could optimize here to immediately claim the block if the
-              // coalesced size is a fit instead of retrying. Keep it simple for now.
-              continue;
-            }
-          }
-        }
-#endif
-      }
-      else {  // needed_bcount <= bcount
-        mi_assert_internal(needed_bcount <= bcount);
-        // it fits, claim the whole block
-        if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) {
-          // ouch, someone else was quicker. Try again..
-          continue;
-        }
-        else {
-          // got it, now split off the needed part
-          if (needed_bcount < bcount) {
-            mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false));
-            mi_atomic_write(block, mi_block_info_create(needed_bcount, true));
-          }
-          // return a pointer to the claimed memory
-          ptrdiff_t idx = (block - arena->blocks);
-          *is_zero = false;
-          *block_index = idx;
-          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
-        }
-      }
+  const size_t fcount = arena->field_count;
+  size_t idx = mi_atomic_read(&arena->search_idx);  // start from last search
+  for (size_t visited = 0; visited < fcount; visited++, idx++) {
+    if (idx >= fcount) idx = 0;  // wrap around
+    if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
+      // claimed it! set the dirty bits
+      *is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx);
+      mi_atomic_write(&arena->search_idx, idx);  // start search from here next time
+      return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE);
     }
   }
-  // no success
   return NULL;
 }
 
-// Try to reduce search time by starting from bottom and wrap around.
-static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index)
-{
-  uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom);
-  void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index);
-  if (p == NULL && bottom > 0) {
-    // try again from the start
-    p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index);
-  }
-  if (p != NULL) {
-    mi_atomic_write(&arena->block_bottom, *block_index);
-  }
-  return p;
-}
 
 /* -----------------------------------------------------------
   Arena Allocation
 ----------------------------------------------------------- */
 
 static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, 
-                                    bool* commit, bool* large, bool* is_zero,
-                                    size_t* memid) 
+                                 bool* commit, bool* large, bool* is_zero, size_t* memid) 
 {
-  size_t block_index = SIZE_MAX;
-  void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index);
+  mi_bitmap_index_t bitmap_index;
+  void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index);
   if (p != NULL) {
-    mi_assert_internal(block_index != SIZE_MAX);
-    #if MI_DEBUG>=1
-    _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
-    mi_block_info_t binfo = mi_atomic_read(block);
-    mi_assert_internal(mi_block_is_in_use(binfo));
-    mi_assert_internal(mi_block_count(binfo) >= needed_bcount);
-    #endif
-    *memid = mi_memid_create(arena_index, block_index);
+    *memid = mi_memid_create(arena_index, bitmap_index);
     *commit = true;           // TODO: support commit on demand?
     *large = arena->is_large;
   }
@@ -261,15 +146,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
   if (large==NULL) large = &default_large;  // ensure `large != NULL`
 
   // try to allocate in an arena if the alignment is small enough
-  // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`.
-  if (alignment <= MI_SEGMENT_ALIGN &&
-      size >= 3*(MI_ARENA_BLOCK_SIZE/4) &&  // > 12MiB (not more than 25% waste)
-      !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB>
-     )
+  // and the object is not too large or too small.
+  if (alignment <= MI_SEGMENT_ALIGN && 
+      size <= MI_ARENA_MAX_OBJ_SIZE && 
+      size >= MI_ARENA_MIN_OBJ_SIZE)
   {
-    size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
-    size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
-    int numa_node = _mi_os_numa_node(tld); // current numa node
+    const size_t bcount = mi_arena_block_count_of_size(size);
+    const int numa_node = _mi_os_numa_node(tld); // current numa node
 
     mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
     // try numa affine allocation
@@ -324,8 +207,8 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
   else {
     // allocated in an arena
     size_t arena_idx;
-    size_t block_idx;
-    mi_memid_indices(memid, &arena_idx, &block_idx);
+    size_t bitmap_idx;
+    mi_memid_indices(memid, &arena_idx, &bitmap_idx);
     mi_assert_internal(arena_idx < MI_MAX_ARENAS);
     mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
     mi_assert_internal(arena != NULL);
@@ -333,27 +216,17 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
       _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
       return;
     }
-    mi_assert_internal(arena->block_count > block_idx);
-    if (arena->block_count <= block_idx) {
-      _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+    mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
+    if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
+      _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
       return;
     }
-    _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx];
-    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
-    mi_assert_internal(mi_block_is_in_use(binfo));
-    mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
-    if (!mi_block_is_in_use(binfo)) {
+    const size_t blocks = mi_arena_block_count_of_size(size);
+    bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
+    if (!ones) {
       _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
       return;
     };
-    bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo);
-    mi_assert_internal(ok);
-    if (!ok) {
-      _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo);
-    }
-    if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) {
-      mi_atomic_write(&arena->block_bottom, block_idx);
-    }
   }
 }
 
@@ -365,8 +238,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
   mi_assert_internal(arena != NULL);
   mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
   mi_assert_internal(arena->block_count > 0);
-  mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t)));
-
+  
   uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
   if (i >= MI_MAX_ARENAS) {
     mi_atomic_subu(&mi_arena_count, 1);
@@ -383,40 +255,49 @@ static bool mi_arena_add(mi_arena_t* arena) {
 #include <errno.h> // ENOMEM
 
 // reserve at a specific numa node
-int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
+int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
   if (pages==0) return 0;
   if (numa_node < -1) numa_node = -1;
   if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
   size_t hsize = 0;
   size_t pages_reserved = 0;
-  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize);
+  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
   if (p==NULL || pages_reserved==0) {
     _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
     return ENOMEM;
   }
   _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
   
-  size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
-  size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t));  // one too much
+  size_t bcount = mi_arena_block_count_of_size(hsize);
+  size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
+  size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));  
   mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) {
     _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
     return ENOMEM;
   }
   arena->block_count = bcount;
-  arena->start = (uint8_t*)p;
-  arena->block_bottom = 0;
+  arena->field_count = fields;
+  arena->start = (uint8_t*)p;  
   arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
   arena->is_large = true;
   arena->is_zero_init = true;
-  memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
+  arena->search_idx = 0;
+  arena->blocks_dirty = &arena->blocks_map[bcount];
+  size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
+  if (post > 0) {
+    // don't use leftover bits at the end
+    mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
+    mi_bitmap_claim(arena->blocks_map, fields, post, postidx); 
+  }
+  
   mi_arena_add(arena);
   return 0;
 }
 
 
 // reserve huge pages evenly among all numa nodes. 
-int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
+int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept {
   if (pages == 0) return 0;
 
   // pages per numa node
@@ -424,12 +305,13 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
   if (numa_count <= 0) numa_count = 1;
   const size_t pages_per = pages / numa_count;
   const size_t pages_mod = pages % numa_count;
+  const size_t timeout_per = (timeout_msecs / numa_count) + 50;
   
   // reserve evenly among numa nodes
   for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
     size_t node_pages = pages_per;  // can be 0
     if ((size_t)numa_node < pages_mod) node_pages++;
-    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node);
+    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
     if (err) return err;
     if (pages < node_pages) {
       pages = 0;
@@ -446,7 +328,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
   UNUSED(max_secs);
   _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
   if (pages_reserved != NULL) *pages_reserved = 0;
-  int err = mi_reserve_huge_os_pages_interleave(pages);  
+  int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0));  
   if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
   return err;
 }
diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c
index 5bea4748..aeb185d1 100644
--- a/src/bitmap.inc.c
+++ b/src/bitmap.inc.c
@@ -135,13 +135,15 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields,
 }
 
 // Set `count` bits at `bitmap_idx` to 0 atomically
-static inline void mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+// Returns `true` if all `count` bits were 1 previously
+static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
   const size_t idx = mi_bitmap_index_field(bitmap_idx);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
   const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
   mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
   mi_assert_internal((bitmap[idx] & mask) == mask);
-  mi_atomic_and(&bitmap[idx], ~mask);
+  uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask);
+  return ((prev & mask) == mask);
 }
 
 
diff --git a/src/init.c b/src/init.c
index ef848de4..f6d253f9 100644
--- a/src/init.c
+++ b/src/init.c
@@ -433,8 +433,8 @@ static void mi_process_load(void) {
   }
 
   if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
-    size_t pages     = mi_option_get(mi_option_reserve_huge_os_pages);    
-    mi_reserve_huge_os_pages_interleave(pages);
+    size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);    
+    mi_reserve_huge_os_pages_interleave(pages, pages*500);
   }
 }
 
diff --git a/src/os.c b/src/os.c
index 254f85f1..027df6ab 100644
--- a/src/os.c
+++ b/src/os.c
@@ -940,16 +940,18 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
     _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
     
     // check for timeout
-    mi_msecs_t elapsed = _mi_clock_end(start_t);
-    if (page >= 1) {
-      mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
-      if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
-        elapsed = max_msecs + 1; 
+    if (max_msecs > 0) {
+      mi_msecs_t elapsed = _mi_clock_end(start_t);
+      if (page >= 1) {
+        mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
+        if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
+          elapsed = max_msecs + 1;
+        }
+      }
+      if (elapsed > max_msecs) {
+        _mi_warning_message("huge page allocation timed out\n");
+        break;
       }
-    }
-    if (elapsed > max_msecs) {
-      _mi_warning_message("huge page allocation timed out\n");
-      break;
     }
   }
   mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);

From 378716c46724d839411166a0bba68b0722cf9d8b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 7 Nov 2019 10:26:52 -0800
Subject: [PATCH 025/104] refactor and improve atomic bitmap usage

---
 CMakeLists.txt                       |  12 ++-
 ide/vs2019/mimalloc-override.vcxproj |   3 +
 ide/vs2019/mimalloc.vcxproj          |   4 +-
 include/mimalloc-internal.h          |  11 ++-
 include/mimalloc-types.h             |  10 +--
 src/arena.c                          |  62 +++++++--------
 src/bitmap.inc.c                     | 110 ++++++++++++++++++---------
 src/memory.c                         |  96 +++++++++++------------
 src/page.c                           |   2 +
 test/test-stress.c                   |   4 +-
 10 files changed, 183 insertions(+), 131 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12540f68..0726c601 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,7 @@ option(MI_SEE_ASM           "Generate assembly files" OFF)
 option(MI_CHECK_FULL        "Use full internal invariant checking in DEBUG mode" OFF)
 option(MI_USE_CXX           "Use the C++ compiler to compile the library" OFF)
 option(MI_SECURE            "Use security mitigations (like guard pages and randomization)" OFF)
+option(MI_SECURE_FULL       "Use full security mitigations (like double free protection, more expensive)" OFF)
 option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
 option(MI_BUILD_TESTS       "Build test executables" ON)
 
@@ -70,9 +71,14 @@ if(MI_OVERRIDE MATCHES "ON")
   endif()
 endif()
 
-if(MI_SECURE MATCHES "ON")
-  message(STATUS "Set secure build (MI_SECURE=ON)")
-  list(APPEND mi_defines MI_SECURE=3)
+if(MI_SECURE_FULL MATCHES "ON")
+  message(STATUS "Set full secure build (experimental) (MI_SECURE_FULL=ON)")
+  list(APPEND mi_defines MI_SECURE=4)
+else()
+  if(MI_SECURE MATCHES "ON")
+    message(STATUS "Set secure build (MI_SECURE=ON)")
+    list(APPEND mi_defines MI_SECURE=3)
+  endif()
 endif()
 
 if(MI_SEE_ASM MATCHES "ON")
diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index e1c7535c..49f3d213 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -232,6 +232,9 @@
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
     <ClCompile Include="..\..\src\arena.c" />
+    <ClCompile Include="..\..\src\bitmap.inc.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </ClCompile>
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index 19696c10..bae49bab 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -218,7 +218,9 @@
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
     <ClCompile Include="..\..\src\arena.c" />
-    <ClCompile Include="..\..\src\bitmap.inc.c" />
+    <ClCompile Include="..\..\src\bitmap.inc.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+    </ClCompile>
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 413f76e6..4d8b6a77 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -163,7 +163,6 @@ bool        _mi_page_is_valid(mi_page_t* page);
 
 
 // Overflow detecting multiply
-#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t)))  // sqrt(SIZE_MAX)
 static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
 #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
 #include <limits.h>   // UINT_MAX, ULONG_MAX
@@ -175,6 +174,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
   return __builtin_umulll_overflow(count, size, total);
 #endif
 #else /* __builtin_umul_overflow is unavailable */
+  #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t)))  // sqrt(SIZE_MAX)
   *total = count * size;
   return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
           && size > 0 && (SIZE_MAX / size) < count);
@@ -188,6 +188,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) {
 
 // Align upwards
 static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
+  mi_assert_internal(alignment != 0);
   uintptr_t mask = alignment - 1;
   if ((alignment & mask) == 0) {  // power of two?
     return ((sz + mask) & ~mask);
@@ -197,6 +198,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
   }
 }
 
+// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
+static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
+  mi_assert_internal(divider != 0);
+  return (divider == 0 ? size : ((size + divider - 1) / divider));
+}
+
 // Is memory zero initialized?
 static inline bool mi_mem_is_zero(void* p, size_t size) {
   for (size_t i = 0; i < size; i++) {
@@ -283,7 +290,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
   // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0];  // huge pages
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
-  mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE);
+  mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE);
   uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
   mi_assert_internal(idx < segment->capacity);
   mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 99b6b22b..ced8e7a9 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // experimental, may be more expensive: checks for double free.
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 0
+#define MI_SECURE 4
 #endif
 
 // Define MI_DEBUG for debug mode
@@ -93,12 +93,12 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_SEGMENT_SHIFT                  ( MI_LARGE_PAGE_SHIFT)      // 4mb
 
 // Derived constants
-#define MI_SEGMENT_SIZE                   (1<<MI_SEGMENT_SHIFT)
+#define MI_SEGMENT_SIZE                   (1UL<<MI_SEGMENT_SHIFT)
 #define MI_SEGMENT_MASK                   ((uintptr_t)MI_SEGMENT_SIZE - 1)
 
-#define MI_SMALL_PAGE_SIZE                (1<<MI_SMALL_PAGE_SHIFT)
-#define MI_MEDIUM_PAGE_SIZE               (1<<MI_MEDIUM_PAGE_SHIFT)
-#define MI_LARGE_PAGE_SIZE                (1<<MI_LARGE_PAGE_SHIFT)
+#define MI_SMALL_PAGE_SIZE                (1UL<<MI_SMALL_PAGE_SHIFT)
+#define MI_MEDIUM_PAGE_SIZE               (1UL<<MI_MEDIUM_PAGE_SHIFT)
+#define MI_LARGE_PAGE_SIZE                (1UL<<MI_LARGE_PAGE_SHIFT)
 
 #define MI_SMALL_PAGES_PER_SEGMENT        (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
 #define MI_MEDIUM_PAGES_PER_SEGMENT       (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
diff --git a/src/arena.c b/src/arena.c
index b807cd47..8feec89f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -7,12 +7,16 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* ----------------------------------------------------------------------------
 "Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to
-allocate in one arena consisting of huge OS pages -- otherwise it 
-delegates to direct allocation from the OS.
+large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). 
+In contrast to the rest of mimalloc, the arenas are shared between 
+threads and need to be accessed using atomic operations.
 
-In the future, we can expose an API to manually add more arenas which
-is sometimes needed for embedded devices or shared memory for example.
+Currently arenas are only used to for huge OS page (1GiB) reservations,
+otherwise it delegates to direct allocation from the OS.
+In the future, we can expose an API to manually add more kinds of arenas 
+which is sometimes needed for embedded devices or shared memory for example.
+(We can also employ this with WASI or `sbrk` systems to reserve large arenas
+ on demand and be able to reuse them efficiently).
 
 The arena allocation needs to be thread safe and we use an atomic
 bitmap to allocate. The current implementation of the bitmap can
@@ -48,10 +52,6 @@ int   _mi_os_numa_node_count(void);
 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 16MiB
 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
 
-// Block info: bit 0 contains the `in_use` bit, the upper bits the
-// size in count of arena blocks.
-typedef uintptr_t mi_block_info_t;
-
 // A memory arena descriptor
 typedef struct mi_arena_s {
   uint8_t* start;                         // the start of the memory area
@@ -61,8 +61,8 @@ typedef struct mi_arena_s {
   bool     is_zero_init;                  // is the arena zero initialized?
   bool     is_large;                      // large OS page allocated
   volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
-  mi_bitmap_field_t* blocks_dirty;         // are the blocks potentially non-zero?
-  mi_bitmap_field_t  blocks_map[1];        // bitmap of in-use blocks 
+  mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
+  mi_bitmap_field_t  blocks_map[1];       // bitmap of in-use blocks 
 } mi_arena_t;
 
 
@@ -81,6 +81,7 @@ static _Atomic(uintptr_t)   mi_arena_count; // = 0
 
 static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
   mi_assert_internal(arena_index < 0xFE);
+  mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
   return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
 }
 
@@ -90,30 +91,25 @@ static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_
   *bitmap_index = (memid >> 8);
 }
 
-
-static size_t mi_arena_block_count_of_size(size_t size) {
-  const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
-  const size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
-  return bcount;
+static size_t mi_block_count_of_size(size_t size) {
+  return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
 }
 
 /* -----------------------------------------------------------
   Thread safe allocation in an arena
 ----------------------------------------------------------- */
-static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx) 
+static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) 
 {
   const size_t fcount = arena->field_count;
   size_t idx = mi_atomic_read(&arena->search_idx);  // start from last search
   for (size_t visited = 0; visited < fcount; visited++, idx++) {
     if (idx >= fcount) idx = 0;  // wrap around
     if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
-      // claimed it! set the dirty bits
-      *is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx);
       mi_atomic_write(&arena->search_idx, idx);  // start search from here next time
-      return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE);
+      return true;
     }
   }
-  return NULL;
+  return false;
 }
 
 
@@ -125,13 +121,15 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
                                  bool* commit, bool* large, bool* is_zero, size_t* memid) 
 {
   mi_bitmap_index_t bitmap_index;
-  void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index);
-  if (p != NULL) {
-    *memid = mi_memid_create(arena_index, bitmap_index);
-    *commit = true;           // TODO: support commit on demand?
-    *large = arena->is_large;
+  if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) {
+    // claimed it! set the dirty bits (todo: no need for an atomic op here?)
+    *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index);
+    *memid   = mi_memid_create(arena_index, bitmap_index);
+    *commit  = true;           // TODO: support commit on demand?
+    *large   = arena->is_large;
+    return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE));
   }
-  return p;
+  return NULL;
 }
 
 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, 
@@ -140,7 +138,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
 {
   mi_assert_internal(memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
-  *memid = MI_MEMID_OS;
+  *memid   = MI_MEMID_OS;
   *is_zero = false;
   bool default_large = false;
   if (large==NULL) large = &default_large;  // ensure `large != NULL`
@@ -151,7 +149,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
       size <= MI_ARENA_MAX_OBJ_SIZE && 
       size >= MI_ARENA_MIN_OBJ_SIZE)
   {
-    const size_t bcount = mi_arena_block_count_of_size(size);
+    const size_t bcount = mi_block_count_of_size(size);
     const int numa_node = _mi_os_numa_node(tld); // current numa node
 
     mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
@@ -221,7 +219,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
       _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
       return;
     }
-    const size_t blocks = mi_arena_block_count_of_size(size);
+    const size_t blocks = mi_block_count_of_size(size);
     bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
     if (!ones) {
       _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
@@ -268,7 +266,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   }
   _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
   
-  size_t bcount = mi_arena_block_count_of_size(hsize);
+  size_t bcount = mi_block_count_of_size(hsize);
   size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
   size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));  
   mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
@@ -284,6 +282,8 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   arena->is_zero_init = true;
   arena->search_idx = 0;
   arena->blocks_dirty = &arena->blocks_map[bcount];
+  // the bitmaps are already zero initialized due to os_alloc
+  // just claim leftover blocks if needed
   size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
   if (post > 0) {
     // don't use leftover bits at the end
diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c
index aeb185d1..19e6bbb8 100644
--- a/src/bitmap.inc.c
+++ b/src/bitmap.inc.c
@@ -1,41 +1,30 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+This file is meant to be included in other files for efficiency.
+It implements a bitmap that can set/reset sequences of bits atomically
+and is used to concurrently claim memory ranges. 
+
+A bitmap is an array of fields where each field is a machine word (`uintptr_t`)
+
+A current limitation is that the bit sequences cannot cross fields 
+and that the sequence must be smaller or equal to the bits in a field.
+---------------------------------------------------------------------------- */
 #pragma once
-#ifndef MI_BITMAP_H
-#define MI_BITMAP_H
+#ifndef MI_BITMAP_C
+#define MI_BITMAP_C
 
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
 
-// Use bit scan forward to quickly find the first zero bit if it is available
-#if defined(_MSC_VER)
-#define MI_HAVE_BITSCAN
-#include <intrin.h>
-static inline size_t mi_bsf(uintptr_t x) {
-  if (x==0) return 8*MI_INTPTR_SIZE;
-  DWORD idx;
-  MI_64(_BitScanForward)(&idx, x);
-  return idx;
-}
-static inline size_t mi_bsr(uintptr_t x) {
-  if (x==0) return 8*MI_INTPTR_SIZE;
-  DWORD idx;
-  MI_64(_BitScanReverse)(&idx, x);
-  return idx;
-}
-#elif defined(__GNUC__) || defined(__clang__)
-#define MI_HAVE_BITSCAN
-#if (INTPTR_MAX == LONG_MAX)
-# define MI_L(x)  x##l
-#else
-# define MI_L(x)  x##ll
-#endif
-static inline size_t mi_bsf(uintptr_t x) {
-  return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
-}
-static inline size_t mi_bsr(uintptr_t x) {
-  return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
-}
-#endif
-
+/* -----------------------------------------------------------
+  Bitmap definition
+----------------------------------------------------------- */
 
 #define MI_BITMAP_FIELD_BITS   (8*MI_INTPTR_SIZE)
 #define MI_BITMAP_FIELD_FULL   (~((uintptr_t)0))   // all bits set
@@ -63,14 +52,59 @@ static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx)
   return (bitmap_idx % MI_BITMAP_FIELD_BITS);
 }
 
+// Get the full bit index
+static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
+  return bitmap_idx;
+}
+
+
 // The bit mask for a given number of blocks at a specified bit index.
 static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
   mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
   return ((((uintptr_t)1 << count) - 1) << bitidx);
 }
 
-// Try to atomically claim a sequence of `count` bits in a single field at `idx` in `bitmap`.
-// Returns `true` on success.
+
+/* -----------------------------------------------------------
+  Use bit scan forward/reverse to quickly find the first zero bit if it is available
+----------------------------------------------------------- */
+#if defined(_MSC_VER)
+#define MI_HAVE_BITSCAN
+#include <intrin.h>
+static inline size_t mi_bsf(uintptr_t x) {
+  if (x==0) return 8*MI_INTPTR_SIZE;
+  DWORD idx;
+  MI_64(_BitScanForward)(&idx, x);
+  return idx;
+}
+static inline size_t mi_bsr(uintptr_t x) {
+  if (x==0) return 8*MI_INTPTR_SIZE;
+  DWORD idx;
+  MI_64(_BitScanReverse)(&idx, x);
+  return idx;
+}
+#elif defined(__GNUC__) || defined(__clang__)
+#include <limits.h> // LONG_MAX
+#define MI_HAVE_BITSCAN
+#if (INTPTR_MAX == LONG_MAX)
+# define MI_L(x)  x##l
+#else
+# define MI_L(x)  x##ll
+#endif
+static inline size_t mi_bsf(uintptr_t x) {
+  return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
+}
+static inline size_t mi_bsr(uintptr_t x) {
+  return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
+}
+#endif
+
+/* -----------------------------------------------------------
+  Claim a bit sequence atomically
+----------------------------------------------------------- */
+
+// Try to atomically claim a sequence of `count` bits in a single 
+// field at `idx` in `bitmap`. Returns `true` on success.
 static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) 
 {  
   mi_assert_internal(bitmap_idx != NULL);
@@ -93,7 +127,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
   while (bitidx <= bitidx_max) {
     if ((map & m) == 0) {  // are the mask bits free at bitidx?
       mi_assert_internal((m >> bitidx) == mask); // no overflow?
-      uintptr_t newmap = map | m;
+      const uintptr_t newmap = map | m;
       mi_assert_internal((newmap^map) >> bitidx == mask);
       if (!mi_atomic_cas_weak(field, newmap, map)) {  // TODO: use strong cas here?
         // no success, another thread claimed concurrently.. keep going
@@ -109,10 +143,10 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
     else {
       // on to the next bit range
 #ifdef MI_HAVE_BITSCAN
-      size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
+      const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
       mi_assert_internal(shift > 0 && shift <= count);
 #else
-      size_t shift = 1;
+      const size_t shift = 1;
 #endif
       bitidx += shift;
       m <<= shift;
diff --git a/src/memory.c b/src/memory.c
index 29e0e412..bdbf1e48 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -16,10 +16,10 @@ We need this memory layer between the raw OS calls because of:
 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
    to reuse memory effectively.
 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
-   an OS allocation/free is still (much) too expensive relative to the accesses in that
-   object :-( (`malloc-large` tests this). This means we need a cheaper way to
-   reuse memory.
-3. This layer can help with a NUMA aware allocation in the future.
+   an OS allocation/free is still (much) too expensive relative to the accesses 
+   in that object :-( (`malloc-large` tests this). This means we need a cheaper 
+   way to reuse memory.
+3. This layer allows for NUMA aware allocation.
 
 Possible issues:
 - (2) can potentially be addressed too with a small cache per thread which is much
@@ -47,8 +47,6 @@ bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-//void*   _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
-//void    _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
 
 // arena.c
 void    _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
@@ -58,18 +56,18 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 16KiB for the region map
+#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 40KiB for the region map 
 #elif (MI_INTPTR_SIZE==4)
-#define MI_HEAP_REGION_MAX_SIZE    (3 * GiB)    // 196 bytes for the region map
+#define MI_HEAP_REGION_MAX_SIZE    (3 * GiB)    // ~ KiB for the region map
 #else
 #error "define the maximum heap space allowed for regions on this platform"
 #endif
 
 #define MI_SEGMENT_ALIGN          MI_SEGMENT_SIZE
 
-#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB
+#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
 #define MI_REGION_MAX_ALLOC_SIZE  (MI_REGION_SIZE/4)                          // 64MiB
-#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  
+#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
 
 
 // Region info is a pointer to the memory region and two bits for 
@@ -95,7 +93,7 @@ typedef struct mem_region_s {
   size_t   arena_memid;                          // if allocated from a (huge page) arena
 } mem_region_t;
 
-// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
+// The region map
 static mem_region_t regions[MI_REGION_MAX];
 
 // A bit mask per region for its claimed MI_SEGMENT_SIZE blocks.
@@ -173,7 +171,7 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i
     bool region_large = allow_large;
     bool is_zero = false;
     size_t arena_memid = 0;
-    void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
+    void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
     mi_assert_internal(!(region_large && !allow_large));
 
     if (start == NULL) {
@@ -183,35 +181,31 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i
     }
 
     // set the newly allocated region
+    // try to initialize any region up to 4 beyond the current one in
+    // care multiple threads are doing this concurrently (common at startup)    
     info = mi_region_info_create(start, region_large, region_commit);
-    if (mi_atomic_cas_strong(&regions[idx].info, info, 0)) {
-      // update the region count
-      regions[idx].arena_memid = arena_memid;
-      mi_atomic_write(&regions[idx].numa_node, _mi_os_numa_node(tld) + 1);
-      mi_atomic_write(&regions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0));
-      mi_atomic_increment(&regions_count);
-    }
-    else {
-      // failed, another thread allocated just before us!
-      // we assign it to a later slot instead (up to 4 tries).
-      for (size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
-        if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
-          regions[idx+i].arena_memid = arena_memid;
-          mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
-          mi_atomic_write(&regions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0));
-          mi_atomic_increment(&regions_count);
-          start = NULL;
-          break;
-        }
+    bool claimed = false;
+    for (size_t i = 0; i <= 4 && idx + i < MI_REGION_MAX && !claimed; i++) {
+      if (!is_zero) {
+        // set dirty bits before CAS; this might race with a zero block but that is ok. 
+        // (but writing before cas prevents a concurrent allocation to assume it is not dirty)
+        mi_atomic_write(&regions_dirty[idx+i], MI_BITMAP_FIELD_FULL);
       }
-      if (start != NULL) {
-        // free it if we didn't succeed to save it to some other region
-        _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
-        // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
+      if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
+        // claimed!
+        regions[idx+i].arena_memid = arena_memid;
+        mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
+        mi_atomic_increment(&regions_count);
+        claimed = true;
       }
-      // and continue with the memory at our index
-      info = mi_atomic_read(&regions[idx].info);
     }
+    if (!claimed) {
+      // free our OS allocation if we didn't succeed to store it in some region
+      _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);      
+    }
+    // continue with the actual info at our index in case another thread was quicker with the allocation
+    info = mi_atomic_read(&regions[idx].info);
+    mi_assert_internal(info != 0);
   }
   mi_assert_internal(info == mi_atomic_read(&regions[idx].info));
   mi_assert_internal(info != 0);
@@ -290,19 +284,21 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a
     int rnode = ((int)mi_atomic_read_relaxed(&regions->numa_node)) - 1;
     if (rnode != numa_node) return false;
   }
-  if (mi_unlikely(!(commit || allow_large))) {
-    // otherwise skip incompatible regions if possible. 
-    // this is not guaranteed due to multiple threads allocating at the same time but
-    // that's ok. In secure mode, large is never allowed for any thread, so that works out; 
-    // otherwise we might just not be able to reset/decommit individual pages sometimes.
-    mi_region_info_t info = mi_atomic_read_relaxed(&regions->info);
-    bool is_large;
-    bool is_committed;
-    void* start = mi_region_info_read(info, &is_large, &is_committed);
-    bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation?
-    if (!ok) return false;
-  }
-  return true;
+  if (commit && allow_large) return true;  // always ok
+
+  // otherwise skip incompatible regions if possible. 
+  // this is not guaranteed due to multiple threads allocating at the same time but
+  // that's ok. In secure mode, large is never allowed for any thread, so that works out; 
+  // otherwise we might just not be able to reset/decommit individual pages sometimes.
+  mi_region_info_t info = mi_atomic_read_relaxed(&regions->info);
+  bool is_large;
+  bool is_committed;
+  void* start = mi_region_info_read(info, &is_large, &is_committed);
+  // note: we also skip if commit is false and the region is committed,
+  // that is a bit strong but prevents allocation of eager delayed segments in 
+  // committed memory
+  bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation?
+  return ok;
 }
 
 // Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
diff --git a/src/page.c b/src/page.c
index 32b68edb..c5b6e370 100644
--- a/src/page.c
+++ b/src/page.c
@@ -497,8 +497,10 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
 static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
 {
   UNUSED(stats);
+  #if (MI_SECURE <= 2)
   mi_assert_internal(page->free == NULL);
   mi_assert_internal(page->local_free == NULL);
+  #endif
   mi_assert_internal(page->capacity + extend <= page->reserved);
   void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
   size_t bsize = page->block_size;
diff --git a/test/test-stress.c b/test/test-stress.c
index bb428072..d80cb1a4 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -66,7 +66,9 @@ static void* alloc_items(size_t items, random_t r) {
   if (chance(1, r)) items *= 100; // 1% huge objects;
   if (items==40) items++;              // pthreads uses that size for stack increases
   uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
-  for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
+  if (p != NULL) {
+    for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
+  }
   return p;
 }
 

From 27f1a8b3d24acf0ff0bcbdacfbecd21437fb450e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 7 Nov 2019 10:35:30 -0800
Subject: [PATCH 026/104] fix avg display; set secure default to 0`

---
 include/mimalloc-types.h | 2 +-
 src/stats.c              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index ddbe72f3..3f5e4e27 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON)
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 4
+#define MI_SECURE 0
 #endif
 
 // Define MI_DEBUG for debug mode
diff --git a/src/stats.c b/src/stats.c
index 011fab64..cb6d8866 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char*
   const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); 
   const long avg_whole = (long)(avg_tens/10);
   const long avg_frac1 = (long)(avg_tens%10);
-  _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1);
+  _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
 }
 
 

From 13f5e6e43e9aae4043d9acc94fac67746fcd9bb4 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 7 Nov 2019 18:09:30 -0800
Subject: [PATCH 027/104] fix numa node check in regions

---
 src/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/memory.c b/src/memory.c
index bdbf1e48..fb3f5093 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -282,7 +282,7 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a
   if (m == MI_BITMAP_FIELD_FULL) return false;
   if (numa_node >= 0) {  // use negative numa node to always succeed
     int rnode = ((int)mi_atomic_read_relaxed(&regions->numa_node)) - 1;
-    if (rnode != numa_node) return false;
+    if (rnode >= 0 && rnode != numa_node) return false;
   }
   if (commit && allow_large) return true;  // always ok
 

From 7b72a4cd50782563104e28becb7e181e8978449f Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 8 Nov 2019 11:55:43 -0800
Subject: [PATCH 028/104] fix region suitable bug

---
 src/memory.c       | 6 +++---
 test/test-stress.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index bdbf1e48..f8798d99 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -281,8 +281,8 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a
   uintptr_t m = mi_atomic_read_relaxed(&regions_map[idx]);
   if (m == MI_BITMAP_FIELD_FULL) return false;
   if (numa_node >= 0) {  // use negative numa node to always succeed
-    int rnode = ((int)mi_atomic_read_relaxed(&regions->numa_node)) - 1;
-    if (rnode != numa_node) return false;
+    int rnode = ((int)mi_atomic_read_relaxed(&regions[idx].numa_node)) - 1;
+    if (rnode >= 0 && rnode != numa_node) return false;
   }
   if (commit && allow_large) return true;  // always ok
 
@@ -290,7 +290,7 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a
   // this is not guaranteed due to multiple threads allocating at the same time but
   // that's ok. In secure mode, large is never allowed for any thread, so that works out; 
   // otherwise we might just not be able to reset/decommit individual pages sometimes.
-  mi_region_info_t info = mi_atomic_read_relaxed(&regions->info);
+  mi_region_info_t info = mi_atomic_read_relaxed(&regions[idx].info);
   bool is_large;
   bool is_committed;
   void* start = mi_region_info_read(info, &is_large, &is_committed);
diff --git a/test/test-stress.c b/test/test-stress.c
index d80cb1a4..be2a9c67 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -18,7 +18,7 @@ terms of the MIT license.
 
 // argument defaults
 static int THREADS = 32;    // more repeatable if THREADS <= #processors
-static int N       = 20;    // scaling factor
+static int N       = 40;    // scaling factor
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int N       = 100;  // scaling factor

From 9f08ddd0d0d2909998d71bf6da9bce2b048d851e Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sat, 9 Nov 2019 19:30:53 -0800
Subject: [PATCH 029/104] refactor regions; add commit tracking on a segment
 basis

---
 src/arena.c      |   9 +-
 src/bitmap.inc.c |  14 +-
 src/memory.c     | 382 ++++++++++++++++++++---------------------------
 src/segment.c    |   2 +-
 4 files changed, 181 insertions(+), 226 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 8feec89f..1b6cf4a4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -123,7 +123,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
   mi_bitmap_index_t bitmap_index;
   if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) {
     // claimed it! set the dirty bits (todo: no need for an atomic op here?)
-    *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index);
+    *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
     *memid   = mi_memid_create(arena_index, bitmap_index);
     *commit  = true;           // TODO: support commit on demand?
     *large   = arena->is_large;
@@ -181,7 +181,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
 
   // finally, fall back to the OS
   *is_zero = true;
-  *memid = MI_MEMID_OS;
+  *memid   = MI_MEMID_OS;
+  if (*large) {
+    *large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed
+  }
   return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
 }
 
@@ -288,7 +291,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   if (post > 0) {
     // don't use leftover bits at the end
     mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
-    mi_bitmap_claim(arena->blocks_map, fields, post, postidx); 
+    mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL); 
   }
   
   mi_arena_add(arena);
diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c
index 19e6bbb8..3847e712 100644
--- a/src/bitmap.inc.c
+++ b/src/bitmap.inc.c
@@ -61,6 +61,7 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
 // The bit mask for a given number of blocks at a specified bit index.
 static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
   mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
+  if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
   return ((((uintptr_t)1 << count) - 1) << bitidx);
 }
 
@@ -183,14 +184,25 @@ static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, s
 
 // Set `count` bits at `bitmap_idx` to 1 atomically
 // Returns `true` if all `count` bits were 0 previously
-static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
   const size_t idx = mi_bitmap_index_field(bitmap_idx);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
   const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
   mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
   // mi_assert_internal((bitmap[idx] & mask) == 0);
   uintptr_t prev = mi_atomic_or(&bitmap[idx], mask);
+  if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
   return ((prev & mask) == 0);
 }
 
+// Returns `true` if all `count` bits were 1
+static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+  const size_t idx = mi_bitmap_index_field(bitmap_idx);
+  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
+  // mi_assert_internal((bitmap[idx] & mask) == 0);
+  return ((mi_atomic_read(&bitmap[idx]) & mask) == mask);
+}
+
 #endif
\ No newline at end of file
diff --git a/src/memory.c b/src/memory.c
index f8798d99..a1f94e18 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -65,10 +65,11 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
 
 #define MI_SEGMENT_ALIGN          MI_SEGMENT_SIZE
 
+#define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
 #define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
-#define MI_REGION_MAX_ALLOC_SIZE  (MI_REGION_SIZE/4)                          // 64MiB
 #define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
-
+#define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
+#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)  
 
 // Region info is a pointer to the memory region and two bits for 
 // its flags: is_large, and is_committed.
@@ -88,20 +89,16 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b
 // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
-  volatile _Atomic(mi_region_info_t) info;       // start of the memory area (and flags)
-  volatile _Atomic(uintptr_t)        numa_node;  // associated numa node + 1 (so 0 is no association)
-  size_t   arena_memid;                          // if allocated from a (huge page) arena
+  volatile _Atomic(mi_region_info_t) info;        // start of the memory area (and flags)
+  volatile _Atomic(uintptr_t)        numa_node;   // associated numa node + 1 (so 0 is no association)
+  mi_bitmap_field_t                  in_use;
+  mi_bitmap_field_t                  dirty;  
+  size_t                             arena_memid; // if allocated from a (huge page) arena
 } mem_region_t;
 
 // The region map
 static mem_region_t regions[MI_REGION_MAX];
 
-// A bit mask per region for its claimed MI_SEGMENT_SIZE blocks.
-static mi_bitmap_field_t regions_map[MI_REGION_MAX];
-
-// A bit mask per region to track which blocks are dirty (= potentially written to)
-static mi_bitmap_field_t regions_dirty[MI_REGION_MAX];
-
 // Allocated regions
 static volatile _Atomic(uintptr_t) regions_count; // = 0;        
 
@@ -112,8 +109,7 @@ Utility functions
 
 // Blocks (of 4MiB) needed for the given size.
 static size_t mi_region_block_count(size_t size) {
-  mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE);
-  return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE;
+  return _mi_divide_up(size, MI_SEGMENT_SIZE);
 }
 
 // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
@@ -134,8 +130,11 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
 }
 
 
-static size_t mi_memid_create(mi_bitmap_index_t bitmap_idx) {
-  return bitmap_idx<<1;
+static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
+  mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
+  size_t idx = region - regions;
+  mi_assert_internal(&regions[idx] == region);
+  return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
 }
 
 static size_t mi_memid_create_from_arena(size_t arena_memid) {
@@ -146,177 +145,149 @@ static bool mi_memid_is_arena(size_t id) {
   return ((id&1)==1);
 }
 
-static bool mi_memid_indices(size_t id, mi_bitmap_index_t* bitmap_idx, size_t* arena_memid) {
+static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
   if (mi_memid_is_arena(id)) {
     *arena_memid = (id>>1);
     return true;
   }
   else {
-    *bitmap_idx = (mi_bitmap_index_t)(id>>1);
+    size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
+    *bit_idx   = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
+    *region    = &regions[idx];
     return false;
   }
 }
 
 /* ----------------------------------------------------------------------------
-  Ensure a region is allocated from the OS (or an arena)
+  Allocate a region is allocated from the OS (or an arena)
 -----------------------------------------------------------------------------*/
 
-static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_info_t* pinfo, mi_os_tld_t* tld)
+static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) 
 {
-  // ensure the region is reserved
-  mi_region_info_t info = mi_atomic_read(&regions[idx].info);
-  if (mi_unlikely(info == 0))
-  {
-    bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
-    bool region_large = allow_large;
-    bool is_zero = false;
-    size_t arena_memid = 0;
-    void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
-    mi_assert_internal(!(region_large && !allow_large));
+  // not out of regions yet?
+  if (mi_atomic_read_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
 
-    if (start == NULL) {
-      // failure to allocate from the OS! fail
-      *pinfo = 0;
-      return false;
-    }
-
-    // set the newly allocated region
-    // try to initialize any region up to 4 beyond the current one in
-    // care multiple threads are doing this concurrently (common at startup)    
-    info = mi_region_info_create(start, region_large, region_commit);
-    bool claimed = false;
-    for (size_t i = 0; i <= 4 && idx + i < MI_REGION_MAX && !claimed; i++) {
-      if (!is_zero) {
-        // set dirty bits before CAS; this might race with a zero block but that is ok. 
-        // (but writing before cas prevents a concurrent allocation to assume it is not dirty)
-        mi_atomic_write(&regions_dirty[idx+i], MI_BITMAP_FIELD_FULL);
-      }
-      if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
-        // claimed!
-        regions[idx+i].arena_memid = arena_memid;
-        mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
-        mi_atomic_increment(&regions_count);
-        claimed = true;
-      }
-    }
-    if (!claimed) {
-      // free our OS allocation if we didn't succeed to store it in some region
-      _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);      
-    }
-    // continue with the actual info at our index in case another thread was quicker with the allocation
-    info = mi_atomic_read(&regions[idx].info);
-    mi_assert_internal(info != 0);
+  // try to allocate a fresh region from the OS
+  bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
+  bool region_large  = (commit && allow_large);  
+  bool is_zero       = false;
+  size_t arena_memid = 0;
+  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
+  if (start == NULL) return false;
+  mi_assert_internal(!(region_large && !allow_large));
+  
+  // claim a fresh slot
+  const uintptr_t idx = mi_atomic_increment(&regions_count);
+  if (idx >= MI_REGION_MAX) {
+    mi_atomic_decrement(&regions_count);
+    _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
+    return false;
   }
-  mi_assert_internal(info == mi_atomic_read(&regions[idx].info));
-  mi_assert_internal(info != 0);
-  *pinfo = info;
+
+  // allocated, initialize and claim the initial blocks
+  mem_region_t* r = &regions[idx];
+  r->numa_node = _mi_os_numa_node(tld) + 1;
+  r->arena_memid = arena_memid;
+  *bit_idx = 0;
+  mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
+  mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others
+  *region = r;
+  return true;
+}
+
+/* ----------------------------------------------------------------------------
+  Try to claim blocks in suitable regions
+-----------------------------------------------------------------------------*/
+
+static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) {
+  // initialized at all?
+  mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
+  if (info==0) return false;
+
+  // numa correct
+  if (numa_node >= 0) {  // use negative numa node to always succeed
+    int rnode = ((int)mi_atomic_read_relaxed(&region->numa_node)) - 1;
+    if (rnode >= 0 && rnode != numa_node) return false;
+  }
+
+  // note: we also skip if commit is false and the region is committed,
+  // that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region
+  bool is_large;
+  bool is_committed;
+  mi_region_info_read(info, &is_large, &is_committed);  
+  
+  if (!commit && is_committed) return false;
+  if (!allow_large && is_large) return false;
   return true;
 }
 
 
-/* ----------------------------------------------------------------------------
-  Commit blocks
------------------------------------------------------------------------------*/
-
-static void* mi_region_commit_blocks(mi_bitmap_index_t bitmap_idx, mi_region_info_t info, size_t blocks, size_t size, bool* commit, bool* is_large, bool* is_zero, mi_os_tld_t* tld)
+static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
-  // set dirty bits
-  *is_zero = mi_bitmap_claim(regions_dirty, MI_REGION_MAX, blocks, bitmap_idx);
+  // try all regions for a free slot
+  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
+  const size_t count = mi_atomic_read(&regions_count);
+  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? 
+  for (size_t visited = 0; visited < count; visited++, idx++) {
+    if (idx >= count) idx = 0;  // wrap around
+    mem_region_t* r = &regions[idx];
+    if (mi_region_is_suitable(r, numa_node, commit, allow_large)) {
+      if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
+        tld->region_idx = idx;    // remember the last found position
+        *region = r;
+        return true;
+      }
+    }
+  }
+  return false;
+}
 
-  // Commit the blocks to memory
+
+static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{
+  mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
+  mem_region_t* region;
+  mi_bitmap_index_t bit_idx;
+  // first try to claim in existing regions
+  if (!mi_region_try_claim(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
+    // otherwise try to allocate a fresh region
+    if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
+      // out of regions or memory
+      return NULL;
+    }
+  }
+  
+  // found a region and claimed `blocks` at `bit_idx`
+  mi_assert_internal(region != NULL);
+  mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
+
+  mi_region_info_t info = mi_atomic_read(&region->info);
   bool region_is_committed = false;
   bool region_is_large = false;
   void* start = mi_region_info_read(info, &region_is_large, &region_is_committed);
   mi_assert_internal(!(region_is_large && !*is_large));
-  mi_assert_internal(start!=NULL);
+  mi_assert_internal(start != NULL);
 
-  void* blocks_start = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bitmap_idx) * MI_SEGMENT_SIZE);
-  if (*commit && !region_is_committed) {
-    // ensure commit 
-    bool commit_zero = false;
-    _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats);  // only commit needed size (unless using large OS pages)
-    if (commit_zero) *is_zero = true;
-  }
-  else if (!*commit && region_is_committed) {
-    // but even when no commit is requested, we might have committed anyway (in a huge OS page for example)
-    *commit = true;
-  }
-
-  // and return the allocation  
-  mi_assert_internal(blocks_start != NULL);
+  bool any_zero = false;
+  *is_zero = mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, &any_zero);
+  if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed
   *is_large = region_is_large;
-  return blocks_start;
+  *memid = mi_memid_create(region, bit_idx);
+  void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
+  if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed?
+    // ensure commit 
+    _mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats);  
+  }
+  else {
+    *commit = region_is_committed || !any_zero;
+  }
+  
+  
+  // and return the allocation  
+  mi_assert_internal(p != NULL);  
+  return p;
 }
 
-/* ----------------------------------------------------------------------------
-  Claim and allocate blocks in a region
------------------------------------------------------------------------------*/
-
-static bool mi_region_alloc_blocks(
-  size_t idx, size_t blocks, size_t size,
-  bool* commit, bool* allow_large, bool* is_zero,
-  void** p, size_t* id, mi_os_tld_t* tld)
-{
-  mi_bitmap_index_t bitmap_idx;
-  if (!mi_bitmap_try_claim_field(regions_map, idx, blocks, &bitmap_idx)) {
-    return true; // no error, but also no success
-  }
-  mi_region_info_t info;
-  if (!mi_region_ensure_allocated(idx,*allow_large,&info,tld)) {
-    // failed to allocate region memory, unclaim the bits and fail
-    mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx);
-    return false;
-  }
-  *p = mi_region_commit_blocks(bitmap_idx,info,blocks,size,commit,allow_large,is_zero,tld);
-  *id = mi_memid_create(bitmap_idx);
-  return true;
-}
-
-
-/* ----------------------------------------------------------------------------
-  Try to allocate blocks in suitable regions
------------------------------------------------------------------------------*/
-
-static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool allow_large ) {
-  uintptr_t m = mi_atomic_read_relaxed(&regions_map[idx]);
-  if (m == MI_BITMAP_FIELD_FULL) return false;
-  if (numa_node >= 0) {  // use negative numa node to always succeed
-    int rnode = ((int)mi_atomic_read_relaxed(&regions[idx].numa_node)) - 1;
-    if (rnode >= 0 && rnode != numa_node) return false;
-  }
-  if (commit && allow_large) return true;  // always ok
-
-  // otherwise skip incompatible regions if possible. 
-  // this is not guaranteed due to multiple threads allocating at the same time but
-  // that's ok. In secure mode, large is never allowed for any thread, so that works out; 
-  // otherwise we might just not be able to reset/decommit individual pages sometimes.
-  mi_region_info_t info = mi_atomic_read_relaxed(&regions[idx].info);
-  bool is_large;
-  bool is_committed;
-  void* start = mi_region_info_read(info, &is_large, &is_committed);
-  // note: we also skip if commit is false and the region is committed,
-  // that is a bit strong but prevents allocation of eager delayed segments in 
-  // committed memory
-  bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation?
-  return ok;
-}
-
-// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
-// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
-// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
-// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_try_alloc_blocks(
-  int numa_node, size_t idx, size_t blocks, size_t size,
-  bool* commit, bool* allow_large, bool* is_zero,
-  void** p, size_t* id, mi_os_tld_t* tld)
-{
-  // check if there are available blocks in the region..
-  mi_assert_internal(idx < MI_REGION_MAX);
-  if (mi_region_is_suitable(numa_node, idx, *commit, *allow_large)) {
-    return mi_region_alloc_blocks(idx, blocks, size, commit, allow_large, is_zero, p, id, tld);
-  }
-  return true;  // no error, but no success either
-}
 
 /* ----------------------------------------------------------------------------
  Allocation
@@ -324,63 +295,35 @@ static bool mi_region_try_alloc_blocks(
 
 // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
 // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, 
-                            size_t* id, mi_os_tld_t* tld)
+void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
-  mi_assert_internal(id != NULL && tld != NULL);
+  mi_assert_internal(memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
-  *id = 0;
+  *memid = 0;
   *is_zero = false;
   bool default_large = false;
   if (large==NULL) large = &default_large;  // ensure `large != NULL`  
-
-  // use direct OS allocation for huge blocks or alignment 
-  if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
-    size_t arena_memid = 0;
-    void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld);  // round up size
-    *id = mi_memid_create_from_arena(arena_memid);
-    return p;
-  }
-
-  // always round size to OS page size multiple (so commit/decommit go over the entire range)
-  // TODO: use large OS page size here?
+  if (size == 0) return NULL;
   size = _mi_align_up(size, _mi_os_page_size());
 
-  // calculate the number of needed blocks
+  // allocate from regions if possible
+  size_t arena_memid;
   const size_t blocks = mi_region_block_count(size);
-  mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
-
-  // find a range of free blocks
-  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
-  void* p = NULL;
-  const size_t count = mi_atomic_read(&regions_count);
-  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? 
-  for (size_t visited = 0; visited < count; visited++, idx++) {
-    if (idx >= count) idx = 0;  // wrap around
-    if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
-    if (p != NULL) break;
-  }
-
-  if (p == NULL) {
-    // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions
-    for (idx = count; idx < mi_atomic_read_relaxed(&regions_count) + 8 && idx < MI_REGION_MAX; idx++) {
-      if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
-      if (p != NULL) break;
+  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
+    void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
+    mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0);    
+    if (p != NULL) {
+      if (*commit) { ((uint8_t*)p)[0] = 0; }
+      return p;
     }
+    _mi_warning_message("unable to allocate from region: size %zu\n", size);
   }
 
-  if (p == NULL) {
-    // we could not find a place to allocate, fall back to the os directly
-    _mi_warning_message("unable to allocate from region: size %zu\n", size);    
-    size_t arena_memid = 0;
-    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
-    *id = mi_memid_create_from_arena(arena_memid);
-  }
-  else {
-    tld->region_idx = idx;  // next start of search
-  }
-
+  // and otherwise fall back to the OS
+  void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
+  *memid = mi_memid_create_from_arena(arena_memid);
   mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
+  if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; }
   return p;
 }
 
@@ -396,31 +339,28 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
   if (p==NULL) return;
   if (size==0) return;
   size_t arena_memid = 0;
-  mi_bitmap_index_t bitmap_idx;
-  if (mi_memid_indices(id,&bitmap_idx,&arena_memid)) {
+  mi_bitmap_index_t bit_idx;
+  mem_region_t* region;
+  if (mi_memid_indices(id,&region,&bit_idx,&arena_memid)) {
    // was a direct arena allocation, pass through
     _mi_arena_free(p, size, arena_memid, stats);
   }
   else {
     // allocated in a region
-    mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return;
+    mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
     // we can align the size up to page size (as we allocate that way too)
     // this ensures we fully commit/decommit/reset
     size = _mi_align_up(size, _mi_os_page_size());    
-    const size_t blocks = mi_region_block_count(size);
-    const size_t idx    = mi_bitmap_index_field(bitmap_idx);
-    const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-    mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
-    mem_region_t* region = &regions[idx];
+    const size_t blocks = mi_region_block_count(size);    
     mi_region_info_t info = mi_atomic_read(&region->info);
     bool is_large;
     bool is_eager_committed;
     void* start = mi_region_info_read(info,&is_large,&is_eager_committed);
     mi_assert_internal(start != NULL);
-    void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
+    void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE);
     mi_assert_internal(blocks_start == p); // not a pointer in our area?
-    mi_assert_internal(bitidx + blocks <= MI_BITMAP_FIELD_BITS);
-    if (blocks_start != p || bitidx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
+    mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
+    if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
 
     // decommit (or reset) the blocks to reduce the working set.
     // TODO: implement delayed decommit/reset as these calls are too expensive
@@ -446,7 +386,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     // this frees up virtual address space which might be useful on 32-bit systems?
 
     // and unclaim
-    mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx);
+    mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
   }
 }
 
@@ -456,13 +396,15 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
 -----------------------------------------------------------------------------*/
 void _mi_mem_collect(mi_stats_t* stats) {
   // free every region that has no segments in use.
-  for (size_t i = 0; i < regions_count; i++) {
-    if (mi_atomic_read_relaxed(&regions_map[i]) == 0) {
+  uintptr_t rcount = mi_atomic_read_relaxed(&regions_count);
+  for (size_t i = 0; i < rcount; i++) {
+    mem_region_t* region = &regions[i];
+    if (mi_atomic_read_relaxed(&region->info) != 0) {
       // if no segments used, try to claim the whole region
       uintptr_t m;
       do {
-        m = mi_atomic_read_relaxed(&regions_map[i]);
-      } while(m == 0 && !mi_atomic_cas_weak(&regions_map[i], MI_BITMAP_FIELD_FULL, 0 ));
+        m = mi_atomic_read_relaxed(&region->in_use);
+      } while(m == 0 && !mi_atomic_cas_weak(&region->in_use, MI_BITMAP_FIELD_FULL, 0 ));
       if (m == 0) {
         // on success, free the whole region
         bool is_eager_committed;
@@ -471,9 +413,7 @@ void _mi_mem_collect(mi_stats_t* stats) {
           _mi_arena_free(start, MI_REGION_SIZE, regions[i].arena_memid, stats);
         }
         // and release
-        mi_atomic_write(&regions[i].info,0);
-        mi_atomic_write(&regions_dirty[i],0);
-        mi_atomic_write(&regions_map[i],0);
+        mi_atomic_write(&region->info,0);
       }
     }
   }
diff --git a/src/segment.c b/src/segment.c
index 178e0eda..b2b37fac 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -370,7 +370,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     }
     segment->memid = memid;
     segment->mem_is_fixed = mem_large;
-    segment->mem_is_committed = commit;
+    segment->mem_is_committed = commit;    
     mi_segments_track_size((long)segment_size, tld);
   }
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);

From d2279b2a3faf7c2e084644449326306ef8d4f619 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sun, 10 Nov 2019 08:13:40 -0800
Subject: [PATCH 030/104] update test-stress with better object distribution

---
 test/test-stress.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index be2a9c67..37572d42 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -17,8 +17,8 @@ terms of the MIT license.
 #include <mimalloc.h>
 
 // argument defaults
-static int THREADS = 32;    // more repeatable if THREADS <= #processors
-static int N       = 40;    // scaling factor
+static int THREADS = 8;    // more repeatable if THREADS <= #processors
+static int N       = 200;    // scaling factor
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int N       = 100;  // scaling factor
@@ -63,7 +63,11 @@ static bool chance(size_t perc, random_t r) {
 }
 
 static void* alloc_items(size_t items, random_t r) {
-  if (chance(1, r)) items *= 100; // 1% huge objects;
+  if (chance(1, r)) {
+    if (chance(1, r)) items *= 1000;       // 0.01% giant
+    else if (chance(10, r)) items *= 100;  // 0.1% huge
+    else items *= 10;                      // 1% large objects;
+  }
   if (items==40) items++;              // pthreads uses that size for stack increases
   uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
   if (p != NULL) {

From 21bbb1be870c8b9bd6ca057257a4cbb0ec57e6e5 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 10 Nov 2019 12:36:55 -0800
Subject: [PATCH 031/104] fix warnings

---
 src/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/init.c b/src/init.c
index 7e704e7a..d5ec03c2 100644
--- a/src/init.c
+++ b/src/init.c
@@ -100,8 +100,8 @@ static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
   { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
-  { 0, tld_main_stats },   // os
-  { MI_STATS_NULL }        // stats
+  { 0, tld_main_stats, {{0,NULL,0}} },   // os
+  { MI_STATS_NULL }                      // stats
 };
 
 mi_heap_t _mi_heap_main = {

From 83a066fd2d0d7484abf6372e41ac777c721c761a Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 11 Nov 2019 09:46:02 -0800
Subject: [PATCH 032/104] remove reset_decommits option

---
 include/mimalloc.h |  3 +--
 src/memory.c       | 28 ++++++++++++----------------
 src/options.c      |  7 +++----
 src/os.c           | 20 +++-----------------
 4 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 70b6e412..4c542ee0 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -272,9 +272,8 @@ typedef enum mi_option_e {
   mi_option_segment_cache,
   mi_option_page_reset,
   mi_option_cache_reset,
-  mi_option_reset_decommits,
-  mi_option_eager_commit_delay,
   mi_option_segment_reset,
+  mi_option_eager_commit_delay,
   mi_option_os_tag,
   mi_option_max_numa_node,
   mi_option_max_errors,
diff --git a/src/memory.c b/src/memory.c
index a1f94e18..ceb9a702 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -350,12 +350,12 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
     // we can align the size up to page size (as we allocate that way too)
     // this ensures we fully commit/decommit/reset
-    size = _mi_align_up(size, _mi_os_page_size());    
-    const size_t blocks = mi_region_block_count(size);    
+    size = _mi_align_up(size, _mi_os_page_size());
+    const size_t blocks = mi_region_block_count(size);
     mi_region_info_t info = mi_atomic_read(&region->info);
     bool is_large;
-    bool is_eager_committed;
-    void* start = mi_region_info_read(info,&is_large,&is_eager_committed);
+    bool is_committed;
+    void* start = mi_region_info_read(info, &is_large, &is_committed);
     mi_assert_internal(start != NULL);
     void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE);
     mi_assert_internal(blocks_start == p); // not a pointer in our area?
@@ -366,18 +366,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     // TODO: implement delayed decommit/reset as these calls are too expensive
     // if the memory is reused soon.
     // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large
-    if (!is_large) {
-      if (mi_option_is_enabled(mi_option_segment_reset)) {
-        if (!is_eager_committed &&  // cannot reset large pages
-          (mi_option_is_enabled(mi_option_eager_commit) ||  // cannot reset halfway committed segments, use `option_page_reset` instead
-            mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments
-        {
-          _mi_os_reset(p, size, stats);
-          //_mi_os_decommit(p, size, stats);  // todo: and clear dirty bits?
-        }
-      }
-    }    
-    if (!is_eager_committed) {
+    if (!is_large &&
+        mi_option_is_enabled(mi_option_segment_reset) &&
+        mi_option_is_enabled(mi_option_eager_commit))  // cannot reset halfway committed segments, use `option_page_reset` instead            
+    {
+      _mi_os_reset(p, size, stats);
+      //_mi_os_decommit(p, size, stats);  // todo: and clear dirty bits?
+    }
+    if (!is_committed) {
       // adjust commit statistics as we commit again when re-using the same slot
       _mi_stat_decrease(&stats->committed, mi_good_commit_size(size));
     }
diff --git a/src/options.c b/src/options.c
index 63b1612a..75a2736a 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,11 +65,10 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 0, UNINIT, MI_OPTION(page_reset) },
+  { 1, UNINIT, MI_OPTION(page_reset) },
   { 0, UNINIT, MI_OPTION(cache_reset) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // note: cannot enable this if secure is on
-  { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
+  { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 256, UNINIT, MI_OPTION(max_numa_node) },     // maximum allowed numa node
   { 16, UNINIT, MI_OPTION(max_errors) }          // maximum errors that are output
@@ -88,7 +87,7 @@ void _mi_options_init(void) {
       mi_option_desc_t* desc = &options[option];
       _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
     }
-  }
+  }  
   mi_max_error_count = mi_option_get(mi_option_max_errors);
 }
 
diff --git a/src/os.c b/src/os.c
index 027df6ab..5229381b 100644
--- a/src/os.c
+++ b/src/os.c
@@ -646,10 +646,6 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) {
   return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats);
 }
 
-bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
-  return mi_os_commitx(addr, size, true, true /* conservative? */, is_zero, stats);
-}
-
 
 // Signal to the OS that the address range is no longer in use
 // but may be used later again. This will release physical memory
@@ -708,22 +704,12 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
 // pages and reduce swapping while keeping the memory committed.
 // We page align to a conservative area inside the range to reset.
 bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_decommit(addr,size,stats);
-  }
-  else {
-    return mi_os_resetx(addr, size, true, stats);
-  }
+  return mi_os_resetx(addr, size, true, stats);
 }
 
 bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_commit_unreset(addr, size, is_zero, stats);  // re-commit it (conservatively!)
-  }
-  else {
-    *is_zero = false;
-    return mi_os_resetx(addr, size, false, stats);
-  }
+  *is_zero = false;
+  return mi_os_resetx(addr, size, false, stats);
 }
 
 

From 93a646338343984b86b00b1c7852322eafa7190e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 11 Nov 2019 14:16:45 -0800
Subject: [PATCH 033/104] only allow commit delay for small and medium objects

---
 src/options.c | 2 +-
 src/segment.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/options.c b/src/options.c
index 75a2736a..dbb7df79 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 1, UNINIT, MI_OPTION(page_reset) },
+  { 0, UNINIT, MI_OPTION(page_reset) },
   { 0, UNINIT, MI_OPTION(cache_reset) },
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
diff --git a/src/segment.c b/src/segment.c
index b2b37fac..d089078c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -328,9 +328,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
 
   // Try to get it from our thread local cache first
-  bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
-  bool eager  = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
-  bool commit = eager || (page_kind > MI_PAGE_MEDIUM);
+  bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
+  bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
+  bool commit = eager || (page_kind >= MI_PAGE_LARGE);
   bool protection_still_good = false;
   bool is_zero = false;
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
@@ -359,7 +359,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   else {
     // Allocate the segment from the OS
     size_t memid;
-    bool   mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
+    bool   mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
     segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {

From 534e1e39ef29946e502fd0f668d2dc80ffd141da Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 11 Nov 2019 14:42:29 -0800
Subject: [PATCH 034/104] allow allocation in committed regions even if not
 requested

---
 src/memory.c  | 6 ++----
 src/options.c | 4 ++--
 src/segment.c | 4 +++-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index ceb9a702..24239e05 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -210,14 +210,12 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
     if (rnode >= 0 && rnode != numa_node) return false;
   }
 
-  // note: we also skip if commit is false and the region is committed,
-  // that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region
+  // check allow-large
   bool is_large;
   bool is_committed;
   mi_region_info_read(info, &is_large, &is_committed);  
-  
-  if (!commit && is_committed) return false;
   if (!allow_large && is_large) return false;
+
   return true;
 }
 
diff --git a/src/options.c b/src/options.c
index dbb7df79..694b916b 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,8 +65,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 0, UNINIT, MI_OPTION(page_reset) },
-  { 0, UNINIT, MI_OPTION(cache_reset) },
+  { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
+  { 0, UNINIT, MI_OPTION(cache_reset) },         // reset segment cache on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
diff --git a/src/segment.c b/src/segment.c
index d089078c..eb5a0390 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -327,12 +327,14 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   mi_assert_internal(segment_size >= required);
   size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
 
-  // Try to get it from our thread local cache first
+  // Initialize parameters
   bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager || (page_kind >= MI_PAGE_LARGE);
   bool protection_still_good = false;
   bool is_zero = false;
+  
+  // Try to get it from our thread local cache first
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
   if (segment != NULL) {
     if (MI_SECURE!=0) {

From 2bb058bd25258c2e7a9fb2c1a64400ec780c2912 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 11 Nov 2019 14:44:32 -0800
Subject: [PATCH 035/104] remove cache_reset parameter

---
 include/mimalloc.h | 1 -
 src/options.c      | 1 -
 src/segment.c      | 6 +-----
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 4c542ee0..6df889a4 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -271,7 +271,6 @@ typedef enum mi_option_e {
   mi_option_reserve_huge_os_pages,
   mi_option_segment_cache,
   mi_option_page_reset,
-  mi_option_cache_reset,
   mi_option_segment_reset,
   mi_option_eager_commit_delay,
   mi_option_os_tag,
diff --git a/src/options.c b/src/options.c
index 694b916b..1231e1c9 100644
--- a/src/options.c
+++ b/src/options.c
@@ -66,7 +66,6 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
-  { 0, UNINIT, MI_OPTION(cache_reset) },         // reset segment cache on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
diff --git a/src/segment.c b/src/segment.c
index eb5a0390..ef24c660 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -280,9 +280,6 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
     return false;
   }
   mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
-  if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) {
-    _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
-  }
   segment->next = tld->cache;
   tld->cache = segment;
   tld->cache_count++;
@@ -351,8 +348,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
       _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats);
       segment->mem_is_committed = true;
     }
-    if (!segment->mem_is_fixed &&
-        (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) {
+    if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_page_reset)) {
       bool reset_zero = false;
       _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats);
       if (reset_zero) is_zero = true;

From db3f1c4bfadcb7007357fd61d7dc24369ae8fe31 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 11 Nov 2019 15:26:11 -0800
Subject: [PATCH 036/104] add commit info to arenas

---
 src/arena.c | 66 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 1b6cf4a4..02890bd6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -33,6 +33,7 @@ of 256MiB in practice.
 
 #include "bitmap.inc.c"  // atomic bitmap
 
+
 // os.c
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
@@ -40,6 +41,7 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
 void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 
+bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); 
 int   _mi_os_numa_node_count(void);
 
 /* -----------------------------------------------------------
@@ -56,13 +58,15 @@ int   _mi_os_numa_node_count(void);
 typedef struct mi_arena_s {
   uint8_t* start;                         // the start of the memory area
   size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
-  size_t   field_count;                   // number of bitmap fields
+  size_t   field_count;                   // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
   int      numa_node;                     // associated NUMA node
   bool     is_zero_init;                  // is the arena zero initialized?
+  bool     is_committed;                  // is the memory committed
   bool     is_large;                      // large OS page allocated
   volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
   mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
-  mi_bitmap_field_t  blocks_map[1];       // bitmap of in-use blocks 
+  mi_bitmap_field_t* blocks_committed;    // if `!is_committed`, are the blocks committed?
+  mi_bitmap_field_t  blocks_inuse[1];       // in-place bitmap of in-use blocks (of size `field_count`)
 } mi_arena_t;
 
 
@@ -104,7 +108,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
   size_t idx = mi_atomic_read(&arena->search_idx);  // start from last search
   for (size_t visited = 0; visited < fcount; visited++, idx++) {
     if (idx >= fcount) idx = 0;  // wrap around
-    if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
+    if (mi_bitmap_try_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
       mi_atomic_write(&arena->search_idx, idx);  // start search from here next time
       return true;
     }
@@ -118,31 +122,46 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 ----------------------------------------------------------- */
 
 static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, 
-                                 bool* commit, bool* large, bool* is_zero, size_t* memid) 
+                                 bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) 
 {
   mi_bitmap_index_t bitmap_index;
-  if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) {
-    // claimed it! set the dirty bits (todo: no need for an atomic op here?)
-    *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
-    *memid   = mi_memid_create(arena_index, bitmap_index);
-    *commit  = true;           // TODO: support commit on demand?
-    *large   = arena->is_large;
-    return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE));
+  if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
+
+  // claimed it! set the dirty bits (todo: no need for an atomic op here?)
+  void* p  = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
+  *memid   = mi_memid_create(arena_index, bitmap_index);
+  *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
+  *large   = arena->is_large;
+  if (arena->is_committed) {
+    // always committed
+    *commit = true;
   }
-  return NULL;
+  else if (commit) {
+    // ensure commit now
+    bool any_zero;
+    mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_zero);
+    if (any_zero) {
+      bool commit_zero;
+      _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
+      if (commit_zero) *is_zero = true;
+    }
+  }
+  else {
+    // no need to commit, but check if already fully committed
+    *commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
+  }
+  return p;
 }
 
 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, 
                               bool* commit, bool* large, bool* is_zero, 
                               size_t* memid, mi_os_tld_t* tld) 
 {
-  mi_assert_internal(memid != NULL && tld != NULL);
+  mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
   *memid   = MI_MEMID_OS;
   *is_zero = false;
-  bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`
-
+  
   // try to allocate in an arena if the alignment is small enough
   // and the object is not too large or too small.
   if (alignment <= MI_SEGMENT_ALIGN && 
@@ -160,7 +179,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
       if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
       { 
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid);
+        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
         mi_assert_internal((uintptr_t)p % alignment == 0);
         if (p != NULL) return p;
       }
@@ -172,7 +191,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
       if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
       {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid);
+        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
         mi_assert_internal((uintptr_t)p % alignment == 0);
         if (p != NULL) return p;
       }
@@ -182,9 +201,6 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
   // finally, fall back to the OS
   *is_zero = true;
   *memid   = MI_MEMID_OS;
-  if (*large) {
-    *large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed
-  }
   return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
 }
 
@@ -223,7 +239,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
       return;
     }
     const size_t blocks = mi_block_count_of_size(size);
-    bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
+    bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
     if (!ones) {
       _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
       return;
@@ -283,15 +299,17 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
   arena->is_large = true;
   arena->is_zero_init = true;
+  arena->is_committed = true;
   arena->search_idx = 0;
-  arena->blocks_dirty = &arena->blocks_map[bcount];
+  arena->blocks_dirty = &arena->blocks_inuse[bcount];
+  arena->blocks_committed = NULL;
   // the bitmaps are already zero initialized due to os_alloc
   // just claim leftover blocks if needed
   size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
   if (post > 0) {
     // don't use leftover bits at the end
     mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
-    mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL); 
+    mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); 
   }
   
   mi_arena_add(arena);

From 5e6754f3f7905485ca74546ab082f4c3bc5404fd Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 11 Nov 2019 15:45:31 -0800
Subject: [PATCH 037/104] track commit status per block in a region

---
 src/memory.c | 49 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index 208b9b7e..8299bbc2 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -59,7 +59,7 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 40KiB for the region map 
+#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 48KiB for the region map 
 #elif (MI_INTPTR_SIZE==4)
 #define MI_HEAP_REGION_MAX_SIZE    (3 * GiB)    // ~ KiB for the region map
 #else
@@ -94,8 +94,9 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b
 typedef struct mem_region_s {
   volatile _Atomic(mi_region_info_t) info;        // start of the memory area (and flags)
   volatile _Atomic(uintptr_t)        numa_node;   // associated numa node + 1 (so 0 is no association)
-  mi_bitmap_field_t                  in_use;
-  mi_bitmap_field_t                  dirty;  
+  mi_bitmap_field_t                  in_use;      // bit per in-use block
+  mi_bitmap_field_t                  dirty;       // track if non-zero per block
+  mi_bitmap_field_t                  commit;      // track if committed per block (if `!info.is_committed))
   size_t                             arena_memid; // if allocated from a (huge page) arena
 } mem_region_t;
 
@@ -165,20 +166,20 @@ static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t
   Allocate a region is allocated from the OS (or an arena)
 -----------------------------------------------------------------------------*/
 
-static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) 
+static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
   // not out of regions yet?
   if (mi_atomic_read_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
 
   // try to allocate a fresh region from the OS
   bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
-  bool region_large  = (commit && allow_large);  
-  bool is_zero       = false;
+  bool region_large = (commit && allow_large);
+  bool is_zero = false;
   size_t arena_memid = 0;
   void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
   if (start == NULL) return false;
   mi_assert_internal(!(region_large && !allow_large));
-  
+
   // claim a fresh slot
   const uintptr_t idx = mi_atomic_increment(&regions_count);
   if (idx >= MI_REGION_MAX) {
@@ -191,8 +192,13 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   mem_region_t* r = &regions[idx];
   r->numa_node = _mi_os_numa_node(tld) + 1;
   r->arena_memid = arena_memid;
+  mi_atomic_write(&r->in_use, 0);
+  mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL));
+  mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0));
   *bit_idx = 0;
   mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
+
+  // and share it 
   mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others
   *region = r;
   return true;
@@ -269,20 +275,28 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
   mi_assert_internal(!(region_is_large && !*is_large));
   mi_assert_internal(start != NULL);
 
-  bool any_zero = false;
-  *is_zero = mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, &any_zero);
-  if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed
+  *is_zero = mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);  
   *is_large = region_is_large;
   *memid = mi_memid_create(region, bit_idx);
   void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
-  if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed?
-    // ensure commit 
-    _mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats);  
+  if (region_is_committed) {
+    // always committed
+    *commit = true;
+  }
+  else if (*commit) {
+    // ensure commit
+    bool any_zero;
+    mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_zero);
+    if (any_zero) {
+      bool commit_zero;
+      _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld);
+      if (commit_zero) *is_zero = true;
+    }
   }
   else {
-    *commit = region_is_committed || !any_zero;
-  }
-  
+    // no need to commit, but check if already fully committed
+    *commit = mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
+  }  
   
   // and return the allocation  
   mi_assert_internal(p != NULL);  
@@ -374,7 +388,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
         mi_option_is_enabled(mi_option_segment_reset) &&
         mi_option_is_enabled(mi_option_eager_commit))  // cannot reset halfway committed segments, use `option_page_reset` instead            
     {
-      _mi_os_reset(p, size, tld->stats);      
+      // note: don't use `_mi_mem_reset` as it is shared with other threads!
+      _mi_os_reset(p, size, tld->stats);    // TODO: maintain reset bits to unreset  
     }
     if (!is_committed) {
       // adjust commit statistics as we commit again when re-using the same slot

From a0958b2da696a308f8c200f45f08bf1ab3e5f14b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 11 Nov 2019 17:06:16 -0800
Subject: [PATCH 038/104] enable more reset delay slots

---
 include/mimalloc-types.h | 13 ++++++--
 src/init.c               |  9 ++++--
 src/memory.c             | 70 ++++++++++++++++++++++++++--------------
 src/options.c            |  2 +-
 src/segment.c            |  4 ++-
 5 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 2651fc85..0ce91339 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -390,13 +390,20 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
 // ------------------------------------------------------
 typedef int64_t mi_msecs_t;
 
+#define MI_RESET_DELAY_SLOTS (256)
+
 typedef struct mi_delay_slot_s {
   mi_msecs_t expire;
   uint8_t*   addr;
   size_t     size;
 } mi_delay_slot_t;
 
-#define MI_RESET_DELAY_SLOTS (128)
+typedef struct mi_delay_slots_s {
+  size_t     capacity; // always `MI_RESET_DELAY_SLOTS`
+  size_t     count;    // current slots used (`<= capacity`)
+  mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS];
+} mi_delay_slots_t;
+
 
 // ------------------------------------------------------
 // Thread Local data
@@ -411,8 +418,8 @@ typedef struct mi_segment_queue_s {
 // OS thread local data
 typedef struct mi_os_tld_s {
   size_t              region_idx;   // start point for next allocation
-  mi_stats_t* stats;        // points to tld stats
-  mi_delay_slot_t     reset_delay[MI_RESET_DELAY_SLOTS];
+  mi_delay_slots_t*   reset_delay;  // delay slots for OS reset operations
+  mi_stats_t*         stats;        // points to tld stats
 } mi_os_tld_t;
 
 // Segments thread local data
diff --git a/src/init.c b/src/init.c
index d5ec03c2..c9700cd5 100644
--- a/src/init.c
+++ b/src/init.c
@@ -100,8 +100,8 @@ static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
   { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
-  { 0, tld_main_stats, {{0,NULL,0}} },   // os
-  { MI_STATS_NULL }                      // stats
+  { 0, NULL, tld_main_stats },  // os
+  { MI_STATS_NULL }             // stats
 };
 
 mi_heap_t _mi_heap_main = {
@@ -192,6 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
 typedef struct mi_thread_data_s {
   mi_heap_t  heap;  // must come first due to cast in `_mi_heap_done`
   mi_tld_t   tld;
+  mi_delay_slots_t reset_delay;
 } mi_thread_data_t;
 
 // Initialize the thread local default heap, called from `mi_thread_init`
@@ -211,6 +212,7 @@ static bool _mi_heap_init(void) {
     }
     mi_tld_t*  tld = &td->tld;
     mi_heap_t* heap = &td->heap;
+    mi_delay_slots_t* reset_delay = &td->reset_delay;
     memcpy(heap, &_mi_heap_empty, sizeof(*heap));
     heap->thread_id = _mi_thread_id();
     heap->random = _mi_random_init(heap->thread_id);
@@ -221,6 +223,9 @@ static bool _mi_heap_init(void) {
     tld->segments.stats = &tld->stats;
     tld->segments.os = &tld->os;
     tld->os.stats = &tld->stats;
+    tld->os.reset_delay = reset_delay;
+    memset(reset_delay, 0, sizeof(*reset_delay));
+    reset_delay->capacity = MI_RESET_DELAY_SLOTS;
     _mi_heap_default = heap;
   }
   return false;
diff --git a/src/memory.c b/src/memory.c
index 8299bbc2..f3052d6b 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -54,7 +54,7 @@ void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s
 void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 
 // local
-static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size);
+static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size);
 
 
 // Constants
@@ -208,7 +208,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   Try to claim blocks in suitable regions
 -----------------------------------------------------------------------------*/
 
-static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) {
+static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
   // initialized at all?
   mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
   if (info==0) return false;
@@ -229,7 +229,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
 }
 
 
-static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
+static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
   // try all regions for a free slot
   const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
@@ -238,7 +238,7 @@ static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, me
   for (size_t visited = 0; visited < count; visited++, idx++) {
     if (idx >= count) idx = 0;  // wrap around
     mem_region_t* r = &regions[idx];
-    if (mi_region_is_suitable(r, numa_node, commit, allow_large)) {
+    if (mi_region_is_suitable(r, numa_node, allow_large)) {
       if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
         tld->region_idx = idx;    // remember the last found position
         *region = r;
@@ -256,7 +256,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
   mem_region_t* region;
   mi_bitmap_index_t bit_idx;
   // first try to claim in existing regions
-  if (!mi_region_try_claim(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
+  if (!mi_region_try_claim(blocks, *is_large, &region, &bit_idx, tld)) {
     // otherwise try to allocate a fresh region
     if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
       // out of regions or memory
@@ -354,7 +354,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
   if (p==NULL) return;
   if (size==0) return;
 
-  mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
+  mi_delay_remove(tld->reset_delay, p, size);
 
   size_t arena_memid = 0;
   mi_bitmap_index_t bit_idx;
@@ -424,7 +424,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
         bool is_eager_committed;
         void* start = mi_region_info_read(mi_atomic_read(&regions[i].info), NULL, &is_eager_committed);
         if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
-          mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE);
+          mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE);
           _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
         }
         // and release
@@ -440,21 +440,22 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
 
 typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg);
 
-static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
+static void mi_delay_insert(mi_delay_slots_t* ds,
   mi_msecs_t delay, uint8_t* addr, size_t size,
   mi_delay_resolve_fun* resolve, void* arg)
 {
-  if (delay==0) {
+  if (ds == NULL || delay==0 || addr==NULL || size==0) {
     resolve(addr, size, arg);
     return;
   }
 
   mi_msecs_t now = _mi_clock_now();
-  mi_delay_slot_t* oldest = slots;
+  mi_delay_slot_t* oldest = &ds->slots[0];
   // walk through all slots, resolving expired ones.
   // remember the oldest slot to insert the new entry in.
-  for (size_t i = 0; i < count; i++) {
-    mi_delay_slot_t* slot = &slots[i];
+  size_t newcount = 0;
+  for (size_t i = 0; i < ds->count; i++) {
+    mi_delay_slot_t* slot = &ds->slots[i];
     
     if (slot->expire == 0) {
       // empty slot
@@ -480,26 +481,40 @@ static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
     }
     else if (oldest->expire > slot->expire) {  
       oldest = slot;
+      newcount = i+1;
+    }
+    else {
+      newcount = i+1;
     }
   }
+  ds->count = newcount;
   if (delay>0) {
-    // not yet registered, use the oldest slot
-    if (oldest->expire > 0) { 
+    // not yet registered, use the oldest slot (or a new one if there is space)
+    if (ds->count < ds->capacity) {
+      oldest = &ds->slots[ds->count];
+      ds->count++;
+    }
+    else if (oldest->expire > 0) { 
       resolve(oldest->addr, oldest->size, arg);  // evict if not empty
     }
+    mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count);
     oldest->expire = now + delay;
     oldest->addr = addr;
     oldest->size = size;
   }
 }
 
-static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size)
+static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size)
 {
+  if (ds == NULL || p==NULL || size==0) return false; 
+  
   uint8_t* addr = (uint8_t*)p;
   bool done = false;
-  // walk through all slots
-  for (size_t i = 0; i < count; i++) {
-    mi_delay_slot_t* slot = &slots[i];
+  size_t newcount = 0;
+  
+  // walk through all valid slots
+  for (size_t i = 0; i < ds->count; i++) {
+    mi_delay_slot_t* slot = &ds->slots[i];
     if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
       // earlier slot encompasses the area; remove it
       slot->expire = 0;
@@ -510,12 +525,17 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_
       slot->expire = 0;
     }
     else if ((addr <= slot->addr && addr + size > slot->addr) ||
-             (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
-      // partial overlap, remove slot
-      mi_assert_internal(false); 
+      (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
+      // partial overlap
+      // can happen with a large object spanning onto some partial end block
+      // mi_assert_internal(false);
       slot->expire = 0;
     }
+    else {
+      newcount = i + 1;
+    }
   }
+  ds->count = newcount;
   return done;
 }
 
@@ -525,13 +545,13 @@ static void mi_resolve_reset(void* p, size_t size, void* vtld) {
 }
 
 bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay),
+  mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay),
     (uint8_t*)p, size, &mi_resolve_reset, tld);
   return true;
 }
 
 bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) {
+  if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) {
     return _mi_os_unreset(p, size, is_zero, tld->stats);
   }
   return true;
@@ -544,12 +564,12 @@ bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
 -----------------------------------------------------------------------------*/
 
 bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
+  mi_delay_remove(tld->reset_delay,p, size);
   return _mi_os_commit(p, size, is_zero, tld->stats);
 }
 
 bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
-  mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
+  mi_delay_remove(tld->reset_delay, p, size);
   return _mi_os_decommit(p, size, tld->stats);
 }
 
diff --git a/src/options.c b/src/options.c
index 81ffe88b..ff96c95b 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
+  { 1, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
diff --git a/src/segment.c b/src/segment.c
index 49dab6ba..549dd339 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -504,7 +504,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   _mi_stat_decrease(&tld->stats->pages, 1);
   
   // reset the page memory to reduce memory pressure?
-  if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
+  if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) 
+       // && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets
+  {
     size_t psize;
     uint8_t* start = _mi_page_start(segment, page, &psize);
     page->is_reset = true;

From 165ee4584597aebdb1a45fcd4e8b3904b6f7d396 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 11 Nov 2019 17:31:48 -0800
Subject: [PATCH 039/104] initialize delay slots for the main thread

---
 src/init.c    | 4 +++-
 src/options.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/init.c b/src/init.c
index c9700cd5..5967b4b9 100644
--- a/src/init.c
+++ b/src/init.c
@@ -96,11 +96,13 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
 #define tld_main_stats  ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
 #define tld_main_os     ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os)))
 
+static mi_delay_slots_t tld_reset_delay_main = { MI_RESET_DELAY_SLOTS, 0, { {0,NULL,0} } };
+
 static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
   { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
-  { 0, NULL, tld_main_stats },  // os
+  { 0, &tld_reset_delay_main, tld_main_stats },  // os
   { MI_STATS_NULL }             // stats
 };
 
diff --git a/src/options.c b/src/options.c
index ff96c95b..81ffe88b 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 1, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
+  { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds

From ef179a63770d8e17f105303a08ddfdd57085b936 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Tue, 12 Nov 2019 10:16:59 -0800
Subject: [PATCH 040/104] avoid allocation at numa node detection on linux

---
 include/mimalloc-internal.h | 37 +++++++++++++++------
 src/os.c                    | 65 +++++++++++++++++--------------------
 2 files changed, 56 insertions(+), 46 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 6bfabe27..668a7bd3 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -17,18 +17,18 @@ terms of the MIT license. A copy of the license can be found in the file
 #if (MI_DEBUG>0)
 #define mi_trace_message(...)  _mi_trace_message(__VA_ARGS__)
 #else
-#define mi_trace_message(...)  
+#define mi_trace_message(...)
 #endif
 
 #if defined(_MSC_VER)
 #define mi_decl_noinline   __declspec(noinline)
-#define mi_attr_noreturn 
+#define mi_attr_noreturn
 #elif defined(__GNUC__) || defined(__clang__)
 #define mi_decl_noinline   __attribute__((noinline))
 #define mi_attr_noreturn   __attribute__((noreturn))
 #else
 #define mi_decl_noinline
-#define mi_attr_noreturn   
+#define mi_attr_noreturn
 #endif
 
 
@@ -56,8 +56,6 @@ void       _mi_os_init(void);                                      // called fro
 void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 size_t     _mi_os_good_alloc_size(size_t size);
-int        _mi_os_numa_node(mi_os_tld_t* tld);
-int        _mi_os_numa_node_count(void);
 
 // memory.c
 void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
@@ -146,8 +144,8 @@ bool        _mi_page_is_valid(mi_page_t* page);
   Inlined definitions
 ----------------------------------------------------------- */
 #define UNUSED(x)     (void)(x)
-#if (MI_DEBUG>0) 
-#define UNUSED_RELEASE(x)  
+#if (MI_DEBUG>0)
+#define UNUSED_RELEASE(x)
 #else
 #define UNUSED_RELEASE(x)  UNUSED(x)
 #endif
@@ -398,7 +396,7 @@ static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* bl
   #endif
 }
 
-static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) {  
+static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) {
   #ifdef MI_ENCODE_FREELIST
   block->next = (mi_encoded_t)next ^ cookie;
   #else
@@ -411,12 +409,12 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t*
   #ifdef MI_ENCODE_FREELIST
   mi_block_t* next = mi_block_nextx(page->cookie,block);
   // check for free list corruption: is `next` at least in our segment range?
-  // TODO: it is better to check if it is actually inside our page but that is more expensive 
+  // TODO: it is better to check if it is actually inside our page but that is more expensive
   // to calculate. Perhaps with a relative free list this becomes feasible?
   if (next!=NULL && !mi_is_in_same_segment(block, next)) {
     _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
     next = NULL;
-  }   
+  }
   return next;
   #else
   UNUSED(page);
@@ -433,6 +431,25 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
   #endif
 }
 
+
+// -------------------------------------------------------------------
+// Optimize numa node access for the common case (= one node)
+// -------------------------------------------------------------------
+
+int _mi_os_numa_node_get(mi_os_tld_t* tld);
+int _mi_os_numa_node_count_get(void);
+
+extern int _mi_numa_node_count;
+static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
+  if (mi_likely(_mi_numa_node_count == 1)) return 0;
+  else return _mi_os_numa_node_get(tld);
+}
+static inline int _mi_os_numa_node_count(void) {
+  if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count;
+  else return _mi_os_numa_node_count_get();
+}
+
+
 // -------------------------------------------------------------------
 // Getting the thread id should be performant
 // as it is called in the fast path of `_mi_free`,
diff --git a/src/os.c b/src/os.c
index 5229381b..d6878927 100644
--- a/src/os.c
+++ b/src/os.c
@@ -786,9 +786,9 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
   const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
 
   mi_win_enable_large_os_pages();
-  
+
   #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
-  MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };  
+  MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
   static bool mi_huge_pages_available = true;
   if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
@@ -818,7 +818,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
   // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
   if (pVirtualAlloc2 != NULL && numa_node >= 0) {
     params[0].Type = MemExtendedParameterNumaNode;
-    params[0].ULong = (unsigned)numa_node;    
+    params[0].ULong = (unsigned)numa_node;
     return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
   }
   #endif
@@ -838,7 +838,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
   #ifdef MI_HAS_NUMA
   if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
     uintptr_t numa_mask = (1UL << numa_node);
-    // TODO: does `mbind` work correctly for huge OS pages? should we 
+    // TODO: does `mbind` work correctly for huge OS pages? should we
     // use `set_mempolicy` before calling mmap instead?
     // see: <https://lkml.org/lkml/2017/2/9/875>
     long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
@@ -857,7 +857,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 }
 #endif
 
-#if (MI_INTPTR_SIZE >= 8) 
+#if (MI_INTPTR_SIZE >= 8)
 // To ensure proper alignment, use our own area for huge OS pages
 static _Atomic(uintptr_t)  mi_huge_start; // = 0
 
@@ -900,7 +900,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
   size_t size = 0;
   uint8_t* start = mi_os_claim_huge_pages(pages, &size);
   if (start == NULL) return NULL; // or 32-bit systems
-  
+
   // Allocate one page at the time but try to place them contiguously
   // We allocate one page at the time to be able to abort if it takes too long
   // or to at least allocate as many as available on the system.
@@ -920,11 +920,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
       }
       break;
     }
-    
+
     // success, record it
     _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
     _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
-    
+
     // check for timeout
     if (max_msecs > 0) {
       mi_msecs_t elapsed = _mi_clock_end(start_t);
@@ -958,7 +958,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
 }
 
 /* ----------------------------------------------------------------------------
-Support NUMA aware allocation 
+Support NUMA aware allocation
 -----------------------------------------------------------------------------*/
 #ifdef WIN32
 static int mi_os_numa_nodex() {
@@ -975,9 +975,8 @@ static int mi_os_numa_node_countx(void) {
   return (int)(numa_max + 1);
 }
 #elif defined(__linux__)
-#include <dirent.h>
-#include <stdlib.h>
-#include <sys/syscall.h>
+#include <sys/syscall.h>  // getcpu
+#include <stdio.h>        // access
 
 static int mi_os_numa_nodex(void) {
 #ifdef SYS_getcpu
@@ -990,22 +989,15 @@ static int mi_os_numa_nodex(void) {
   return 0;
 #endif
 }
-
 static int mi_os_numa_node_countx(void) {
-  DIR* d = opendir("/sys/devices/system/node");
-  if (d==NULL) return 1;
-  
-  struct dirent* de;
-  int max_node_num = 0;
-  while ((de = readdir(d)) != NULL) {
-  	int node_num;
-  	if (strncmp(de->d_name, "node", 4) == 0) {
-		  node_num = (int)strtol(de->d_name+4, NULL, 0);
-			if (max_node_num < node_num) max_node_num = node_num;
-    }
+  char buf[128];
+  int max_node = mi_option_get(mi_option_max_numa_node);
+  int node = 0;
+  for(node = 0; node < max_node; node++) {
+    snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1);
+    if (access(buf,R_OK) != 0) break;
   }
-  closedir(d);
-  return (max_node_num + 1);
+  return (node+1);
 }
 #else
 static int mi_os_numa_nodex(void) {
@@ -1016,29 +1008,30 @@ static int mi_os_numa_node_countx(void) {
 }
 #endif
 
-int _mi_os_numa_node_count(void) {
-  static int numa_node_count = 0;   // cache the node count 
-  if (mi_unlikely(numa_node_count <= 0)) {
-    int ncount = mi_os_numa_node_countx();    
+int _mi_numa_node_count = 0;   // cache the node count
+
+int _mi_os_numa_node_count_get(void) {
+  if (mi_unlikely(_mi_numa_node_count <= 0)) {
+    int ncount = mi_os_numa_node_countx();
     int ncount0 = ncount;
     // never more than max numa node and at least 1
     int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node);
     if (ncount > nmax) ncount = nmax;
     if (ncount <= 0)   ncount = 1;
-    numa_node_count = ncount;
-    _mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0);
+    _mi_numa_node_count = ncount;
+    _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0);
   }
-  mi_assert_internal(numa_node_count >= 1);
-  return numa_node_count;
+  mi_assert_internal(_mi_numa_node_count >= 1);
+  return _mi_numa_node_count;
 }
 
-int _mi_os_numa_node(mi_os_tld_t* tld) {
+int _mi_os_numa_node_get(mi_os_tld_t* tld) {
   UNUSED(tld);
   int numa_count = _mi_os_numa_node_count();
   if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
   // never more than the node count and >= 0
   int numa_node = mi_os_numa_nodex();
   if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
-  if (numa_node < 0) numa_node = 0;  
+  if (numa_node < 0) numa_node = 0;
   return numa_node;
 }

From af746ca4c1682e29dd42e8c0e6fa6db6aa04b200 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Tue, 12 Nov 2019 10:17:39 -0800
Subject: [PATCH 041/104] inline bitmap_mask

---
 src/bitmap.inc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c
index 3847e712..81f87a79 100644
--- a/src/bitmap.inc.c
+++ b/src/bitmap.inc.c
@@ -8,11 +8,11 @@ terms of the MIT license. A copy of the license can be found in the file
 /* ----------------------------------------------------------------------------
 This file is meant to be included in other files for efficiency.
 It implements a bitmap that can set/reset sequences of bits atomically
-and is used to concurrently claim memory ranges. 
+and is used to concurrently claim memory ranges.
 
 A bitmap is an array of fields where each field is a machine word (`uintptr_t`)
 
-A current limitation is that the bit sequences cannot cross fields 
+A current limitation is that the bit sequences cannot cross fields
 and that the sequence must be smaller or equal to the bits in a field.
 ---------------------------------------------------------------------------- */
 #pragma once
@@ -59,7 +59,7 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
 
 
 // The bit mask for a given number of blocks at a specified bit index.
-static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
+static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
   mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
   if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
   return ((((uintptr_t)1 << count) - 1) << bitidx);
@@ -104,10 +104,10 @@ static inline size_t mi_bsr(uintptr_t x) {
   Claim a bit sequence atomically
 ----------------------------------------------------------- */
 
-// Try to atomically claim a sequence of `count` bits in a single 
+// Try to atomically claim a sequence of `count` bits in a single
 // field at `idx` in `bitmap`. Returns `true` on success.
-static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) 
-{  
+static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
+{
   mi_assert_internal(bitmap_idx != NULL);
   volatile _Atomic(uintptr_t)* field = &bitmap[idx];
   uintptr_t map  = mi_atomic_read(field);
@@ -136,7 +136,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
         continue;
       }
       else {
-        // success, we claimed the bits!        
+        // success, we claimed the bits!
         *bitmap_idx = mi_bitmap_index_create(idx, bitidx);
         return true;
       }
@@ -205,4 +205,4 @@ static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields
   return ((mi_atomic_read(&bitmap[idx]) & mask) == mask);
 }
 
-#endif
\ No newline at end of file
+#endif

From 867d78f877474c7f36fd19bc2ea62918f117f068 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Tue, 12 Nov 2019 10:19:52 -0800
Subject: [PATCH 042/104] reserve huge OS pages earlier on at process_init

---
 src/init.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/init.c b/src/init.c
index 5967b4b9..473e9a32 100644
--- a/src/init.c
+++ b/src/init.c
@@ -19,7 +19,7 @@ const mi_page_t _mi_page_empty = {
   0,
   #endif
   0,       // used
-  NULL, 
+  NULL,
   ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
   0, NULL, NULL, NULL
   #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST))
@@ -246,7 +246,7 @@ static bool _mi_heap_done(void) {
   // switch to backing heap and free it
   heap = heap->tld->heap_backing;
   if (!mi_heap_is_initialized(heap)) return false;
-  
+
   // collect if not the main thread
   if (heap != &_mi_heap_main) {
     _mi_heap_collect_abandon(heap);
@@ -394,7 +394,7 @@ bool mi_is_redirected() mi_attr_noexcept {
 }
 
 // Communicate with the redirection module on Windows
-#if defined(_WIN32) && defined(MI_SHARED_LIB) 
+#if defined(_WIN32) && defined(MI_SHARED_LIB)
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -440,11 +440,6 @@ static void mi_process_load(void) {
   if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
     _mi_fputs(NULL,NULL,msg);
   }
-
-  if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
-    size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);    
-    mi_reserve_huge_os_pages_interleave(pages, pages*500);
-  }
 }
 
 // Initialize the process; called by thread_init or the process loader
@@ -471,6 +466,11 @@ void mi_process_init(void) mi_attr_noexcept {
   #endif
   mi_thread_init();
   mi_stats_reset();  // only call stat reset *after* thread init (or the heap tld == NULL)
+  
+  if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
+    size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
+    mi_reserve_huge_os_pages_interleave(pages, pages*500);
+  }
 }
 
 // Called when the process is done (through `at_exit`)
@@ -497,7 +497,7 @@ static void mi_process_done(void) {
 
 
 #if defined(_WIN32) && defined(MI_SHARED_LIB)
-  // Windows DLL: easy to hook into process_init and thread_done  
+  // Windows DLL: easy to hook into process_init and thread_done
   __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
     UNUSED(reserved);
     UNUSED(inst);

From d4f54dcf3049bd958ee262cbd9b3b0c7134d59ed Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Tue, 12 Nov 2019 10:37:15 -0800
Subject: [PATCH 043/104] remove numaif dependency on linux

---
 CMakeLists.txt | 11 -----------
 src/os.c       | 21 ++++++++++++++-------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 18bdea5a..a2258128 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,6 @@ option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanis
 option(MI_BUILD_TESTS       "Build test executables" ON)
 
 include("cmake/mimalloc-config-version.cmake")
-include("CheckIncludeFile")
 
 set(mi_install_dir "lib/mimalloc-${mi_version}")
 
@@ -98,16 +97,6 @@ if(MI_USE_CXX MATCHES "ON")
   set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX )
 endif()
 
-CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H)
-if(MI_HAVE_NUMA_H)
-  list(APPEND mi_defines MI_HAS_NUMA)
-  list(APPEND mi_libraries numa)
-else()
-  if (NOT(WIN32))
-    message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)")
-  endif()
-endif()
-
 # Compiler flags
 if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
   list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas)
diff --git a/src/os.c b/src/os.c
index d6878927..7af7363b 100644
--- a/src/os.c
+++ b/src/os.c
@@ -827,28 +827,35 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 }
 
 #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
-#ifdef MI_HAS_NUMA
-#include <numaif.h> // mbind, and use -lnuma
+#include <sys/syscall.h>
+#ifndef MPOL_PREFERRED
+#define MPOL_PREFERRED 1
+#endif
+#if defined(SYS_mbind)
+static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
+  return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags);
+}
+#else
+static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
+  UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags);
+  return 0;
+}
 #endif
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
   mi_assert_internal(size%GiB == 0);
   bool is_large = true;
   void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (p == NULL) return NULL;
-  #ifdef MI_HAS_NUMA
   if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
     uintptr_t numa_mask = (1UL << numa_node);
     // TODO: does `mbind` work correctly for huge OS pages? should we
     // use `set_mempolicy` before calling mmap instead?
     // see: <https://lkml.org/lkml/2017/2/9/875>
-    long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
+    long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
     if (err != 0) {
       _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
     }
   }
-  #else
-  UNUSED(numa_node);
-  #endif
   return p;
 }
 #else

From bdb82748191ac5dbc436f0f62dcbebfd3df95157 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Tue, 12 Nov 2019 12:04:43 -0800
Subject: [PATCH 044/104] change max_numa_node to max_numa_nodes option

---
 include/mimalloc.h | 2 +-
 src/options.c      | 2 +-
 src/os.c           | 7 ++++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 67b17c73..8d029135 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -275,7 +275,7 @@ typedef enum mi_option_e {
   mi_option_eager_commit_delay,
   mi_option_reset_delay,
   mi_option_os_tag,
-  mi_option_max_numa_node,
+  mi_option_max_numa_nodes,
   mi_option_max_errors,
   _mi_option_last
 } mi_option_t;
diff --git a/src/options.c b/src/options.c
index 81ffe88b..bbea4e67 100644
--- a/src/options.c
+++ b/src/options.c
@@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
-  { 256, UNINIT, MI_OPTION(max_numa_node) },     // maximum allowed numa node
+  { 256, UNINIT, MI_OPTION(max_numa_nodes) },    // use at most N numa nodes
   { 16, UNINIT, MI_OPTION(max_errors) }          // maximum errors that are output
 };
 
diff --git a/src/os.c b/src/os.c
index 7af7363b..93fb8b31 100644
--- a/src/os.c
+++ b/src/os.c
@@ -998,9 +998,10 @@ static int mi_os_numa_nodex(void) {
 }
 static int mi_os_numa_node_countx(void) {
   char buf[128];
-  int max_node = mi_option_get(mi_option_max_numa_node);
+  int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness)
   int node = 0;
-  for(node = 0; node < max_node; node++) {
+  for(node = 0; node < max_nodes; node++) {
+    // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
     snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1);
     if (access(buf,R_OK) != 0) break;
   }
@@ -1022,7 +1023,7 @@ int _mi_os_numa_node_count_get(void) {
     int ncount = mi_os_numa_node_countx();
     int ncount0 = ncount;
     // never more than max numa node and at least 1
-    int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node);
+    int nmax = (int)mi_option_get(mi_option_max_numa_nodes);
     if (ncount > nmax) ncount = nmax;
     if (ncount <= 0)   ncount = 1;
     _mi_numa_node_count = ncount;

From d01ed42bcb755ed6c1b52bfd8a306821da098dd5 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Wed, 13 Nov 2019 13:35:50 -0800
Subject: [PATCH 045/104] replace max_numa_nodes by use_numa_nodes (to help
 with wrong detection of numa nodes on WSL for example)

---
 include/mimalloc-internal.h |  8 +++---
 include/mimalloc.h          |  4 +--
 src/arena.c                 | 15 +++++------
 src/init.c                  |  2 +-
 src/options.c               |  4 +--
 src/os.c                    | 54 +++++++++++++++++--------------------
 6 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 668a7bd3..77045a99 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -436,15 +436,15 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
 // Optimize numa node access for the common case (= one node)
 // -------------------------------------------------------------------
 
-int _mi_os_numa_node_get(mi_os_tld_t* tld);
-int _mi_os_numa_node_count_get(void);
+int    _mi_os_numa_node_get(mi_os_tld_t* tld);
+size_t _mi_os_numa_node_count_get(void);
 
-extern int _mi_numa_node_count;
+extern size_t _mi_numa_node_count;
 static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
   if (mi_likely(_mi_numa_node_count == 1)) return 0;
   else return _mi_os_numa_node_get(tld);
 }
-static inline int _mi_os_numa_node_count(void) {
+static inline size_t _mi_os_numa_node_count(void) {
   if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count;
   else return _mi_os_numa_node_count_get();
 }
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 8d029135..3c942849 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -230,7 +230,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
 mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
 mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
 
-mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept;
+mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
 mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
 
 // deprecated
@@ -274,8 +274,8 @@ typedef enum mi_option_e {
   mi_option_segment_reset,
   mi_option_eager_commit_delay,
   mi_option_reset_delay,
+  mi_option_use_numa_nodes,
   mi_option_os_tag,
-  mi_option_max_numa_nodes,
   mi_option_max_errors,
   _mi_option_last
 } mi_option_t;
diff --git a/src/arena.c b/src/arena.c
index 02890bd6..46741208 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -42,7 +42,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_sec
 void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 
 bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); 
-int   _mi_os_numa_node_count(void);
 
 /* -----------------------------------------------------------
   Arena allocation
@@ -317,22 +316,22 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
 }
 
 
-// reserve huge pages evenly among all numa nodes. 
-int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept {
+// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
+int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
   if (pages == 0) return 0;
 
   // pages per numa node
-  int numa_count = _mi_os_numa_node_count();
+  size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
   if (numa_count <= 0) numa_count = 1;
   const size_t pages_per = pages / numa_count;
   const size_t pages_mod = pages % numa_count;
   const size_t timeout_per = (timeout_msecs / numa_count) + 50;
   
   // reserve evenly among numa nodes
-  for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
+  for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
     size_t node_pages = pages_per;  // can be 0
-    if ((size_t)numa_node < pages_mod) node_pages++;
-    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
+    if (numa_node < pages_mod) node_pages++;
+    int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
     if (err) return err;
     if (pages < node_pages) {
       pages = 0;
@@ -349,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
   UNUSED(max_secs);
   _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
   if (pages_reserved != NULL) *pages_reserved = 0;
-  int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0));  
+  int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));  
   if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
   return err;
 }
diff --git a/src/init.c b/src/init.c
index 473e9a32..72543b95 100644
--- a/src/init.c
+++ b/src/init.c
@@ -469,7 +469,7 @@ void mi_process_init(void) mi_attr_noexcept {
   
   if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
     size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
-    mi_reserve_huge_os_pages_interleave(pages, pages*500);
+    mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
   }
 }
 
diff --git a/src/options.c b/src/options.c
index bbea4e67..180f6a75 100644
--- a/src/options.c
+++ b/src/options.c
@@ -69,9 +69,9 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
+  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
-  { 256, UNINIT, MI_OPTION(max_numa_nodes) },    // use at most N numa nodes
-  { 16, UNINIT, MI_OPTION(max_errors) }          // maximum errors that are output
+  { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/src/os.c b/src/os.c
index 93fb8b31..2415a40d 100644
--- a/src/os.c
+++ b/src/os.c
@@ -968,66 +968,61 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
 Support NUMA aware allocation
 -----------------------------------------------------------------------------*/
 #ifdef WIN32
-static int mi_os_numa_nodex() {
+static size_t mi_os_numa_nodex() {
   PROCESSOR_NUMBER pnum;
   USHORT numa_node = 0;
   GetCurrentProcessorNumberEx(&pnum);
   GetNumaProcessorNodeEx(&pnum,&numa_node);
-  return (int)numa_node;
+  return numa_node;
 }
 
-static int mi_os_numa_node_countx(void) {
+static size_t mi_os_numa_node_countx(void) {
   ULONG numa_max = 0;
   GetNumaHighestNodeNumber(&numa_max);
-  return (int)(numa_max + 1);
+  return (numa_max + 1);
 }
 #elif defined(__linux__)
 #include <sys/syscall.h>  // getcpu
 #include <stdio.h>        // access
 
-static int mi_os_numa_nodex(void) {
+static size_t mi_os_numa_nodex(void) {
 #ifdef SYS_getcpu
-  unsigned node = 0;
-  unsigned ncpu = 0;
-  int err = syscall(SYS_getcpu, &ncpu, &node, NULL);
+  unsigned long node = 0;
+  unsigned long ncpu = 0;
+  long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
   if (err != 0) return 0;
-  return (int)node;
+  return node;
 #else
   return 0;
 #endif
 }
-static int mi_os_numa_node_countx(void) {
+static size_t mi_os_numa_node_countx(void) {
   char buf[128];
-  int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness)
-  int node = 0;
-  for(node = 0; node < max_nodes; node++) {
+  unsigned node = 0;
+  for(node = 0; node < 256; node++) {
     // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
-    snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1);
+    snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
     if (access(buf,R_OK) != 0) break;
   }
   return (node+1);
 }
 #else
-static int mi_os_numa_nodex(void) {
+static size_t mi_os_numa_nodex(void) {
   return 0;
 }
-static int mi_os_numa_node_countx(void) {
+static size_t mi_os_numa_node_countx(void) {
   return 1;
 }
 #endif
 
-int _mi_numa_node_count = 0;   // cache the node count
+size_t _mi_numa_node_count = 0;   // cache the node count
 
-int _mi_os_numa_node_count_get(void) {
+size_t _mi_os_numa_node_count_get(void) {
   if (mi_unlikely(_mi_numa_node_count <= 0)) {
-    int ncount = mi_os_numa_node_countx();
-    int ncount0 = ncount;
-    // never more than max numa node and at least 1
-    int nmax = (int)mi_option_get(mi_option_max_numa_nodes);
-    if (ncount > nmax) ncount = nmax;
-    if (ncount <= 0)   ncount = 1;
-    _mi_numa_node_count = ncount;
-    _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0);
+    long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
+    if (ncount <= 0) ncount = (long)mi_os_numa_node_countx();        // or detect dynamically
+    _mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount);
+    _mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count);
   }
   mi_assert_internal(_mi_numa_node_count >= 1);
   return _mi_numa_node_count;
@@ -1035,11 +1030,10 @@ int _mi_os_numa_node_count_get(void) {
 
 int _mi_os_numa_node_get(mi_os_tld_t* tld) {
   UNUSED(tld);
-  int numa_count = _mi_os_numa_node_count();
+  size_t numa_count = _mi_os_numa_node_count();
   if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
   // never more than the node count and >= 0
-  int numa_node = mi_os_numa_nodex();
+  size_t numa_node = mi_os_numa_nodex();
   if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
-  if (numa_node < 0) numa_node = 0;
-  return numa_node;
+  return (int)numa_node;
 }

From 30e2c54adba9f1d2ef32e35e4e6c4b80e5732c26 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Wed, 20 Nov 2019 14:13:02 -0800
Subject: [PATCH 046/104] remove delayed reset option (for now)

---
 include/mimalloc.h |   2 +-
 src/memory.c       | 139 ++------------------------
 src/options.c      |   4 +-
 src/os.c           | 237 +++++++++++++++++++++++++--------------------
 4 files changed, 142 insertions(+), 240 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 3c942849..a59b9cf7 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -273,7 +273,7 @@ typedef enum mi_option_e {
   mi_option_page_reset,
   mi_option_segment_reset,
   mi_option_eager_commit_delay,
-  mi_option_reset_delay,
+  mi_option_reset_decommits,
   mi_option_use_numa_nodes,
   mi_option_os_tag,
   mi_option_max_errors,
diff --git a/src/memory.c b/src/memory.c
index f3052d6b..b0bcf7a0 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -53,9 +53,6 @@ void    _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
 void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 
-// local
-static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size);
-
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
@@ -354,8 +351,6 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
   if (p==NULL) return;
   if (size==0) return;
 
-  mi_delay_remove(tld->reset_delay, p, size);
-
   size_t arena_memid = 0;
   mi_bitmap_index_t bit_idx;
   mem_region_t* region;
@@ -424,7 +419,6 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
         bool is_eager_committed;
         void* start = mi_region_info_read(mi_atomic_read(&regions[i].info), NULL, &is_eager_committed);
         if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
-          mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE);
           _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
         }
         // and release
@@ -434,142 +428,23 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
   }
 }
 
-/* ----------------------------------------------------------------------------
-  Delay slots
------------------------------------------------------------------------------*/
-
-typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg);
-
-static void mi_delay_insert(mi_delay_slots_t* ds,
-  mi_msecs_t delay, uint8_t* addr, size_t size,
-  mi_delay_resolve_fun* resolve, void* arg)
-{
-  if (ds == NULL || delay==0 || addr==NULL || size==0) {
-    resolve(addr, size, arg);
-    return;
-  }
-
-  mi_msecs_t now = _mi_clock_now();
-  mi_delay_slot_t* oldest = &ds->slots[0];
-  // walk through all slots, resolving expired ones.
-  // remember the oldest slot to insert the new entry in.
-  size_t newcount = 0;
-  for (size_t i = 0; i < ds->count; i++) {
-    mi_delay_slot_t* slot = &ds->slots[i];
-    
-    if (slot->expire == 0) {
-      // empty slot
-      oldest = slot;
-    }
-    // TODO: should we handle overlapping areas too?
-    else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
-      // earlier slot encompasses new area, increase expiration
-      slot->expire = now + delay;
-      delay = 0; 
-    }
-    else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) {
-      // new one encompasses old slot, overwrite
-      slot->expire = now + delay;
-      slot->addr = addr;
-      slot->size = size;
-      delay = 0;
-    }
-    else if (slot->expire < now) {
-      // expired slot, resolve now
-      slot->expire = 0;
-      resolve(slot->addr, slot->size, arg);
-    }
-    else if (oldest->expire > slot->expire) {  
-      oldest = slot;
-      newcount = i+1;
-    }
-    else {
-      newcount = i+1;
-    }
-  }
-  ds->count = newcount;
-  if (delay>0) {
-    // not yet registered, use the oldest slot (or a new one if there is space)
-    if (ds->count < ds->capacity) {
-      oldest = &ds->slots[ds->count];
-      ds->count++;
-    }
-    else if (oldest->expire > 0) { 
-      resolve(oldest->addr, oldest->size, arg);  // evict if not empty
-    }
-    mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count);
-    oldest->expire = now + delay;
-    oldest->addr = addr;
-    oldest->size = size;
-  }
-}
-
-static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size)
-{
-  if (ds == NULL || p==NULL || size==0) return false; 
-  
-  uint8_t* addr = (uint8_t*)p;
-  bool done = false;
-  size_t newcount = 0;
-  
-  // walk through all valid slots
-  for (size_t i = 0; i < ds->count; i++) {
-    mi_delay_slot_t* slot = &ds->slots[i];
-    if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
-      // earlier slot encompasses the area; remove it
-      slot->expire = 0;
-      done = true;
-    }
-    else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) {
-      // new one encompasses old slot, remove it
-      slot->expire = 0;
-    }
-    else if ((addr <= slot->addr && addr + size > slot->addr) ||
-      (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
-      // partial overlap
-      // can happen with a large object spanning onto some partial end block
-      // mi_assert_internal(false);
-      slot->expire = 0;
-    }
-    else {
-      newcount = i + 1;
-    }
-  }
-  ds->count = newcount;
-  return done;
-}
-
-static void mi_resolve_reset(void* p, size_t size, void* vtld) {
-  mi_os_tld_t* tld = (mi_os_tld_t*)vtld;
-  _mi_os_reset(p, size, tld->stats);
-}
-
-bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay),
-    (uint8_t*)p, size, &mi_resolve_reset, tld);
-  return true;
-}
-
-bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) {
-    return _mi_os_unreset(p, size, is_zero, tld->stats);
-  }
-  return true;
-}
-
-
 
 /* ----------------------------------------------------------------------------
   Other
 -----------------------------------------------------------------------------*/
+bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
+  return _mi_os_reset(p, size, tld->stats);
+}
+
+bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
+  return _mi_os_unreset(p, size, is_zero, tld->stats);
+}
 
 bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  mi_delay_remove(tld->reset_delay,p, size);
   return _mi_os_commit(p, size, is_zero, tld->stats);
 }
 
 bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
-  mi_delay_remove(tld->reset_delay, p, size);
   return _mi_os_decommit(p, size, tld->stats);
 }
 
diff --git a/src/options.c b/src/options.c
index 180f6a75..8c4c1707 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,10 +65,10 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
+  { 1, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
+  { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset uses decommit/commit
   { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
diff --git a/src/os.c b/src/os.c
index 2415a40d..02683a02 100644
--- a/src/os.c
+++ b/src/os.c
@@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) {
 // round to a good OS allocation size (bounded by max 12.5% waste)
 size_t _mi_os_good_alloc_size(size_t size) {
   size_t align_size;
-  if (size < 512*KiB) align_size = _mi_os_page_size();
-  else if (size < 2*MiB) align_size = 64*KiB;
-  else if (size < 8*MiB) align_size = 256*KiB;
-  else if (size < 32*MiB) align_size = 1*MiB;
-  else align_size = 4*MiB;
+  if (size < 512 * KiB) align_size = _mi_os_page_size();
+  else if (size < 2 * MiB) align_size = 64 * KiB;
+  else if (size < 8 * MiB) align_size = 256 * KiB;
+  else if (size < 32 * MiB) align_size = 1 * MiB;
+  else align_size = 4 * MiB;
   if (size >= (SIZE_MAX - align_size)) return size; // possible overflow?
   return _mi_align_up(size, align_size);
 }
@@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) {
 // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
 // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's.
 #include <winternl.h>
-typedef PVOID    (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
-typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG);
+typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG);
 static PVirtualAlloc2 pVirtualAlloc2 = NULL;
 static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
 
@@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages()
     if (err == 0) err = GetLastError();
     _mi_warning_message("cannot enable large OS page support, error %lu\n", err);
   }
-  return (ok!=0);
+  return (ok != 0);
 }
 
 void _mi_os_init(void) {
@@ -144,7 +144,7 @@ void _mi_os_init(void) {
   if (hDll != NULL) {
     // use VirtualAlloc2FromApp if possible as it is available to Windows store apps
     pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
-    if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
+    if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
     FreeLibrary(hDll);
   }
   hDll = LoadLibrary(TEXT("ntdll.dll"));
@@ -170,7 +170,7 @@ void _mi_os_init() {
     os_alloc_granularity = os_page_size;
   }
   if (mi_option_is_enabled(mi_option_large_os_pages)) {
-    large_os_page_size = 2*MiB;
+    large_os_page_size = 2 * MiB;
   }
 }
 #endif
@@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
 #if (MI_INTPTR_SIZE >= 8)
   // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations
   void* hint;
-  if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) {
+  if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) {
     return VirtualAlloc(hint, size, flags, PAGE_READWRITE);
   }
 #endif
@@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
   static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
   void* p = NULL;
   if ((large_only || use_large_os_page(size, try_alignment))
-      && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
+    && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) {
     uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
     if (!large_only && try_ok > 0) {
       // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
@@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
       if (large_only) return p;
       // fall back to non-large page allocation on error (`p == NULL`).
       if (p == NULL) {
-        mi_atomic_write(&large_page_try_ok,10);  // on error, don't try again for the next N allocations
+        mi_atomic_write(&large_page_try_ok, 10);  // on error, don't try again for the next N allocations
       }
     }
   }
   if (p == NULL) {
-    *is_large = ((flags&MEM_LARGE_PAGES) != 0);
+    *is_large = ((flags & MEM_LARGE_PAGES) != 0);
     p = mi_win_virtual_allocx(addr, size, try_alignment, flags);
   }
   if (p == NULL) {
@@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
 #elif defined(__wasi__)
 static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
   uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size();
-  uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment);
-  size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size());
+  uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment);
+  size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size());
   mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0);
   if (alloc_size < size) return NULL;
   if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) {
@@ -278,47 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
 #define MI_OS_USE_MMAP
 static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
   void* p = NULL;
-  #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
+#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
   // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
   void* hint;
   if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) {
-    p = mmap(hint,size,protect_flags,flags,fd,0);
-    if (p==MAP_FAILED) p = NULL; // fall back to regular mmap
+    p = mmap(hint, size, protect_flags, flags, fd, 0);
+    if (p == MAP_FAILED) p = NULL; // fall back to regular mmap
   }
-  #else
+#else
   UNUSED(try_alignment);
-  #endif
-  if (p==NULL) {
-    p = mmap(addr,size,protect_flags,flags,fd,0);
-    if (p==MAP_FAILED) p = NULL;
+#endif
+  if (p == NULL) {
+    p = mmap(addr, size, protect_flags, flags, fd, 0);
+    if (p == MAP_FAILED) p = NULL;
   }
   return p;
 }
 
 static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
   void* p = NULL;
-  #if !defined(MAP_ANONYMOUS)
-  #define MAP_ANONYMOUS  MAP_ANON
-  #endif
-  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#if !defined(MAP_ANONYMOUS)
+#define MAP_ANONYMOUS  MAP_ANON
+#endif
+#if !defined(MAP_NORESERVE)
+#define MAP_NORESERVE  0
+#endif
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
   int fd = -1;
-  #if defined(MAP_ALIGNED)  // BSD
+#if defined(MAP_ALIGNED)  // BSD
   if (try_alignment > 0) {
     size_t n = _mi_bsr(try_alignment);
     if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
       flags |= MAP_ALIGNED(n);
     }
   }
-  #endif
-  #if defined(PROT_MAX)
+#endif
+#if defined(PROT_MAX)
   protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
-  #endif
-  #if defined(VM_MAKE_TAG)
-  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
+#endif
+#if defined(VM_MAKE_TAG)
+// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
   int os_tag = (int)mi_option_get(mi_option_os_tag);
   if (os_tag < 100 || os_tag > 255) os_tag = 100;
   fd = VM_MAKE_TAG(os_tag);
-  #endif
+#endif
   if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
     static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
     uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
@@ -332,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
     else {
       int lflags = flags;
       int lfd = fd;
-      #ifdef MAP_ALIGNED_SUPER
+#ifdef MAP_ALIGNED_SUPER
       lflags |= MAP_ALIGNED_SUPER;
-      #endif
-      #ifdef MAP_HUGETLB
+#endif
+#ifdef MAP_HUGETLB
       lflags |= MAP_HUGETLB;
-      #endif
-      #ifdef MAP_HUGE_1GB
+#endif
+#ifdef MAP_HUGE_1GB
       static bool mi_huge_pages_available = true;
       if ((size % GiB) == 0 && mi_huge_pages_available) {
         lflags |= MAP_HUGE_1GB;
       }
       else
-      #endif
+#endif
       {
-        #ifdef MAP_HUGE_2MB
+#ifdef MAP_HUGE_2MB
         lflags |= MAP_HUGE_2MB;
-        #endif
+#endif
       }
-      #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
+#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
       lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
-      #endif
+#endif
       if (large_only || lflags != flags) {
         // try large OS page allocation
         *is_large = true;
         p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
-        #ifdef MAP_HUGE_1GB
+#ifdef MAP_HUGE_1GB
         if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
           mi_huge_pages_available = false; // don't try huge 1GiB pages again
           _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno);
           lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
           p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
         }
-        #endif
+#endif
         if (large_only) return p;
         if (p == NULL) {
           mi_atomic_write(&large_page_try_ok, 10);  // on error, don't try again for the next N allocations
@@ -375,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   if (p == NULL) {
     *is_large = false;
     p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
-    #if defined(MADV_HUGEPAGE)
+#if defined(MADV_HUGEPAGE)
     // Many Linux systems don't allow MAP_HUGETLB but they support instead
     // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE
     // though since properly aligned allocations will already use large pages if available
@@ -387,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
         *is_large = true; // possibly
       };
     }
-    #endif
+#endif
   }
   return p;
 }
@@ -401,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base;
 // Return a 4MiB aligned address that is probably available
 static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
   if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL;
-  if ((size%MI_SEGMENT_SIZE) != 0) return NULL;
+  if ((size % MI_SEGMENT_SIZE) != 0) return NULL;
   intptr_t hint = mi_atomic_add(&aligned_base, size);
-  if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
+  if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
     intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area
-    #if (MI_SECURE>0 || MI_DEBUG==0)     // security: randomize start of aligned allocations unless in debug mode
+#if (MI_SECURE>0 || MI_DEBUG==0)     // security: randomize start of aligned allocations unless in debug mode
     uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint);
-    init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF));  // (randomly 0-64k)*4MiB == 0 to 256GiB
-    #endif
+    init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF));  // (randomly 0-64k)*4MiB == 0 to 256GiB
+#endif
     mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size);
     hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
   }
-  if (hint%try_alignment != 0) return NULL;
+  if (hint % try_alignment != 0) return NULL;
   return (void*)hint;
 }
 #else
@@ -441,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
   }
   */
 
-  #if defined(_WIN32)
-    int flags = MEM_RESERVE;
-    if (commit) flags |= MEM_COMMIT;
-    p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
-  #elif defined(__wasi__)
-    *is_large = false;
-    p = mi_wasm_heap_grow(size, try_alignment);
-  #else
-    int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
-    p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
-  #endif
+#if defined(_WIN32)
+  int flags = MEM_RESERVE;
+  if (commit) flags |= MEM_COMMIT;
+  p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
+#elif defined(__wasi__)
+  *is_large = false;
+  p = mi_wasm_heap_grow(size, try_alignment);
+#else
+  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
+  p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
+#endif
   mi_stat_counter_increase(stats->mmap_calls, 1);
   if (p != NULL) {
     _mi_stat_increase(&stats->reserved, size);
@@ -561,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
     allow_large = *large;
     *large = false;
   }
-  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats);
+  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats);
 }
 
 
@@ -613,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
     _mi_stat_decrease(&stats->committed, csize);
   }
 
-  #if defined(_WIN32)
+#if defined(_WIN32)
   if (commit) {
     // if the memory was already committed, the call succeeds but it is not zero'd
     // *is_zero = true;
@@ -624,28 +627,42 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
     BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT);
     err = (ok ? 0 : GetLastError());
   }
-  #elif defined(__wasi__)
+#elif defined(__wasi__)
   // WebAssembly guests can't control memory protection
-  #else
+#elif defined(MAP_FIXED)
+  if (!commit) {
+    // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
+    void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0);
+    if (p != start) { err = errno; }
+  }
+  else {
+    // for commit, just change the protection
+    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
+    if (err != 0) { err = errno; }
+  }
+#else
   err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE));
   if (err != 0) { err = errno; }
-  #endif
+#endif
   if (err != 0) {
-    _mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err);
+    _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
   }
   mi_assert_internal(err == 0);
   return (err == 0);
 }
 
 bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
-  return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats);
+  return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats);
 }
 
 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) {
   bool is_zero;
-  return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats);
+  return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
 }
 
+bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
+  return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats);
+}
 
 // Signal to the OS that the address range is no longer in use
 // but may be used later again. This will release physical memory
@@ -657,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
   if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr)
   if (reset) _mi_stat_increase(&stats->reset, csize);
-        else _mi_stat_decrease(&stats->reset, csize);
+  else _mi_stat_decrease(&stats->reset, csize);
   if (!reset) return true; // nothing to do on unreset!
 
-  #if (MI_DEBUG>1)
-  if (MI_SECURE==0) {
+#if (MI_DEBUG>1)
+  if (MI_SECURE == 0) {
     memset(start, 0, csize); // pretend it is eagerly reset
   }
-  #endif
+#endif
 
 #if defined(_WIN32)
   // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory
   void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE);
   mi_assert_internal(p == start);
-  #if 1
+#if 1
   if (p == start && start != NULL) {
-    VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set
+    VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set
   }
-  #endif
+#endif
   if (p != start) return false;
 #else
 #if defined(MADV_FREE)
@@ -704,12 +721,22 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
 // pages and reduce swapping while keeping the memory committed.
 // We page align to a conservative area inside the range to reset.
 bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
-  return mi_os_resetx(addr, size, true, stats);
+  if (mi_option_is_enabled(mi_option_reset_decommits)) {
+    return _mi_os_decommit(addr, size, stats);
+  }
+  else {
+    return mi_os_resetx(addr, size, true, stats);
+  }
 }
 
 bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
-  *is_zero = false;
-  return mi_os_resetx(addr, size, false, stats);
+  if (mi_option_is_enabled(mi_option_reset_decommits)) {
+    return _mi_os_commit_unreset(addr, size, is_zero, stats);  // re-commit it (conservatively!)
+  }
+  else {
+    *is_zero = false;
+    return mi_os_resetx(addr, size, false, stats);
+  }
 }
 
 
@@ -721,7 +748,7 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   if (csize == 0) return false;
   /*
   if (_mi_os_is_huge_reserved(addr)) {
-	  _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
+    _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
   }
   */
   int err = 0;
@@ -753,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) {
 
 bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
   // page align conservatively within the range
-  mi_assert_internal(oldsize > newsize && p != NULL);
+  mi_assert_internal(oldsize > newsize&& p != NULL);
   if (oldsize < newsize || p == NULL) return false;
   if (oldsize == newsize) return true;
 
@@ -781,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`)
 #if defined(WIN32) && (MI_INTPTR_SIZE >= 8)
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 {
-  mi_assert_internal(size%GiB == 0);
+  mi_assert_internal(size % GiB == 0);
   mi_assert_internal(addr != NULL);
   const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
 
   mi_win_enable_large_os_pages();
 
-  #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
+#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
   static bool mi_huge_pages_available = true;
   if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
-    #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
-    #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
-    #endif
+#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
+#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
+#endif
     params[0].Type = 5; // == MemExtendedParameterAttributeFlags;
     params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
     ULONG param_count = 1;
@@ -821,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
     params[0].ULong = (unsigned)numa_node;
     return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
   }
-  #endif
+#endif
   // otherwise use regular virtual alloc on older windows
   return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
 }
@@ -842,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons
 }
 #endif
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
-  mi_assert_internal(size%GiB == 0);
+  mi_assert_internal(size % GiB == 0);
   bool is_large = true;
   void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (p == NULL) return NULL;
-  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
+  if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes
     uintptr_t numa_mask = (1UL << numa_node);
     // TODO: does `mbind` work correctly for huge OS pages? should we
     // use `set_mempolicy` before calling mmap instead?
     // see: <https://lkml.org/lkml/2017/2/9/875>
-    long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
+    long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0);
     if (err != 0) {
       _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
     }
@@ -883,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
       start = ((uintptr_t)32 << 40);  // 32TiB virtual start address
 #if (MI_SECURE>0 || MI_DEBUG==0)      // security: randomize start of huge pages unless in debug mode
       uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages);
-      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
+      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
 #endif
     }
     end = start + size;
@@ -936,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
     if (max_msecs > 0) {
       mi_msecs_t elapsed = _mi_clock_end(start_t);
       if (page >= 1) {
-        mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
-        if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
+        mi_msecs_t estimate = ((elapsed / (page + 1)) * pages);
+        if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break
           elapsed = max_msecs + 1;
         }
       }
@@ -947,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
       }
     }
   }
-  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
+  mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size);
   if (pages_reserved != NULL) *pages_reserved = page;
   if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE;
   return (page == 0 ? NULL : start);
@@ -956,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
 // free every huge page in a range individually (as we allocated per page)
 // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
 void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
-  if (p==NULL || size==0) return;
+  if (p == NULL || size == 0) return;
   uint8_t* base = (uint8_t*)p;
   while (size >= MI_HUGE_OS_PAGE_SIZE) {
     _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats);
@@ -972,7 +999,7 @@ static size_t mi_os_numa_nodex() {
   PROCESSOR_NUMBER pnum;
   USHORT numa_node = 0;
   GetCurrentProcessorNumberEx(&pnum);
-  GetNumaProcessorNodeEx(&pnum,&numa_node);
+  GetNumaProcessorNodeEx(&pnum, &numa_node);
   return numa_node;
 }
 
@@ -999,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) {
 static size_t mi_os_numa_node_countx(void) {
   char buf[128];
   unsigned node = 0;
-  for(node = 0; node < 256; node++) {
+  for (node = 0; node < 256; node++) {
     // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
     snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
-    if (access(buf,R_OK) != 0) break;
+    if (access(buf, R_OK) != 0) break;
   }
-  return (node+1);
+  return (node + 1);
 }
 #else
 static size_t mi_os_numa_nodex(void) {
@@ -1031,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) {
 int _mi_os_numa_node_get(mi_os_tld_t* tld) {
   UNUSED(tld);
   size_t numa_count = _mi_os_numa_node_count();
-  if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
+  if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0
   // never more than the node count and >= 0
   size_t numa_node = mi_os_numa_nodex();
   if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }

From 211f1aa5190f063ee8eef237473281535c2be79f Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Wed, 20 Nov 2019 14:55:12 -0800
Subject: [PATCH 047/104] remove reset delay slots; add reset tracking per page
 and segment

---
 include/mimalloc-internal.h |   8 +-
 include/mimalloc-types.h    |  28 +---
 include/mimalloc.h          |   3 +-
 src/arena.c                 |   8 +-
 src/bitmap.inc.c            |  54 ++++++--
 src/init.c                  |  11 +-
 src/memory.c                | 199 +++++++++++++++------------
 src/options.c               |   5 +-
 src/os.c                    | 204 ++++++++++++++--------------
 src/page.c                  |   7 +-
 src/segment.c               | 264 ++++++++++++++++++++++--------------
 11 files changed, 443 insertions(+), 348 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index d727e563..ab295e65 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -59,7 +59,7 @@ size_t     _mi_os_good_alloc_size(size_t size);
 
 // memory.c
 void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
-void       _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld);
+void       _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld);
 
 bool       _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
 bool       _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
@@ -75,7 +75,7 @@ void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t*
 void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
 bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
 void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
-uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page
+uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page
 
 // "page.c"
 void*      _mi_malloc_generic(mi_heap_t* heap, size_t size)  mi_attr_noexcept mi_attr_malloc;
@@ -297,7 +297,9 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
 
 // Quick page start for initialized pages
 static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
-  return _mi_segment_page_start(segment, page, page->block_size, page_size);
+  const size_t bsize = page->block_size;
+  mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0);
+  return _mi_segment_page_start(segment, page, bsize, page_size, NULL);
 }
 
 // Get the page containing the pointer
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 0ce91339..e816c3a6 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -384,31 +384,12 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
 #define mi_heap_stat_increase(heap,stat,amount)  mi_stat_increase( (heap)->tld->stats.stat, amount)
 #define mi_heap_stat_decrease(heap,stat,amount)  mi_stat_decrease( (heap)->tld->stats.stat, amount)
 
-
-// ------------------------------------------------------
-// Delay slots (to avoid expensive OS calls)
-// ------------------------------------------------------
-typedef int64_t mi_msecs_t;
-
-#define MI_RESET_DELAY_SLOTS (256)
-
-typedef struct mi_delay_slot_s {
-  mi_msecs_t expire;
-  uint8_t*   addr;
-  size_t     size;
-} mi_delay_slot_t;
-
-typedef struct mi_delay_slots_s {
-  size_t     capacity; // always `MI_RESET_DELAY_SLOTS`
-  size_t     count;    // current slots used (`<= capacity`)
-  mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS];
-} mi_delay_slots_t;
-
-
 // ------------------------------------------------------
 // Thread Local data
 // ------------------------------------------------------
 
+typedef int64_t  mi_msecs_t;
+
 // Queue of segments
 typedef struct mi_segment_queue_s {
   mi_segment_t* first;
@@ -417,9 +398,8 @@ typedef struct mi_segment_queue_s {
 
 // OS thread local data
 typedef struct mi_os_tld_s {
-  size_t              region_idx;   // start point for next allocation
-  mi_delay_slots_t*   reset_delay;  // delay slots for OS reset operations
-  mi_stats_t*         stats;        // points to tld stats
+  size_t                region_idx;   // start point for next allocation
+  mi_stats_t*           stats;        // points to tld stats
 } mi_os_tld_t;
 
 // Segments thread local data
diff --git a/include/mimalloc.h b/include/mimalloc.h
index a59b9cf7..197b1734 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -272,8 +272,9 @@ typedef enum mi_option_e {
   mi_option_segment_cache,
   mi_option_page_reset,
   mi_option_segment_reset,
-  mi_option_eager_commit_delay,
   mi_option_reset_decommits,
+  mi_option_eager_commit_delay,
+  mi_option_reset_delay,
   mi_option_use_numa_nodes,
   mi_option_os_tag,
   mi_option_max_errors,
diff --git a/src/arena.c b/src/arena.c
index 46741208..4a596b2c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -107,7 +107,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
   size_t idx = mi_atomic_read(&arena->search_idx);  // start from last search
   for (size_t visited = 0; visited < fcount; visited++, idx++) {
     if (idx >= fcount) idx = 0;  // wrap around
-    if (mi_bitmap_try_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
+    if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
       mi_atomic_write(&arena->search_idx, idx);  // start search from here next time
       return true;
     }
@@ -137,9 +137,9 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
   }
   else if (commit) {
     // ensure commit now
-    bool any_zero;
-    mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_zero);
-    if (any_zero) {
+    bool any_uncommitted;
+    mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
+    if (any_uncommitted) {
       bool commit_zero;
       _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
       if (commit_zero) *is_zero = true;
diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c
index 81f87a79..11ada472 100644
--- a/src/bitmap.inc.c
+++ b/src/bitmap.inc.c
@@ -104,9 +104,29 @@ static inline size_t mi_bsr(uintptr_t x) {
   Claim a bit sequence atomically
 ----------------------------------------------------------- */
 
+// Try to atomically claim a sequence of `count` bits at in `idx`
+// in the bitmap field. Returns `true` on success.
+static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_fields, const size_t count, mi_bitmap_index_t bitmap_idx) {
+  const size_t idx = mi_bitmap_index_field(bitmap_idx);
+  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
+  mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS);
+
+  mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]);
+  if ((field & mask) == 0) { // free?
+    if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) {
+      // claimed!
+      return true;
+    }
+  }
+  return false;
+}
+
+
 // Try to atomically claim a sequence of `count` bits in a single
 // field at `idx` in `bitmap`. Returns `true` on success.
-static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
+static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
 {
   mi_assert_internal(bitmap_idx != NULL);
   volatile _Atomic(uintptr_t)* field = &bitmap[idx];
@@ -160,9 +180,9 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con
 
 // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
 // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
-static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
+static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
   for (size_t idx = 0; idx < bitmap_fields; idx++) {
-    if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) {
+    if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
       return true;
     }
   }
@@ -170,39 +190,51 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields,
 }
 
 // Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously
+// Returns `true` if all `count` bits were 1 previously.
 static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
   const size_t idx = mi_bitmap_index_field(bitmap_idx);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
   const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
   mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
-  mi_assert_internal((bitmap[idx] & mask) == mask);
+  // mi_assert_internal((bitmap[idx] & mask) == mask);
   uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask);
   return ((prev & mask) == mask);
 }
 
 
 // Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously
+// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
 static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
   const size_t idx = mi_bitmap_index_field(bitmap_idx);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
   const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
   mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
-  // mi_assert_internal((bitmap[idx] & mask) == 0);
+  //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
   uintptr_t prev = mi_atomic_or(&bitmap[idx], mask);
   if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
   return ((prev & mask) == 0);
 }
 
-// Returns `true` if all `count` bits were 1
-static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
+static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
   const size_t idx = mi_bitmap_index_field(bitmap_idx);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
   const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
   mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
-  // mi_assert_internal((bitmap[idx] & mask) == 0);
-  return ((mi_atomic_read(&bitmap[idx]) & mask) == mask);
+  mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]);
+  if (any_ones != NULL) *any_ones = ((field & mask) != 0);
+  return ((field & mask) == mask);
 }
 
+static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+  return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
+}
+
+static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+  bool any_ones;
+  mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
+  return any_ones;
+}
+
+
 #endif
diff --git a/src/init.c b/src/init.c
index f9735462..468fd46f 100644
--- a/src/init.c
+++ b/src/init.c
@@ -97,13 +97,11 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
 #define tld_main_stats  ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
 #define tld_main_os     ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os)))
 
-static mi_delay_slots_t tld_reset_delay_main = { MI_RESET_DELAY_SLOTS, 0, { {0,NULL,0} } };
-
 static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
   { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
-  { 0, &tld_reset_delay_main, tld_main_stats },  // os
+  { 0, tld_main_stats },  // os
   { MI_STATS_NULL }             // stats
 };
 
@@ -194,8 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
 
 typedef struct mi_thread_data_s {
   mi_heap_t  heap;  // must come first due to cast in `_mi_heap_done`
-  mi_tld_t   tld;
-  mi_delay_slots_t reset_delay;
+  mi_tld_t   tld;  
 } mi_thread_data_t;
 
 // Initialize the thread local default heap, called from `mi_thread_init`
@@ -215,7 +212,6 @@ static bool _mi_heap_init(void) {
     }
     mi_tld_t*  tld = &td->tld;
     mi_heap_t* heap = &td->heap;
-    mi_delay_slots_t* reset_delay = &td->reset_delay;
     memcpy(heap, &_mi_heap_empty, sizeof(*heap));
     heap->thread_id = _mi_thread_id();
     heap->random = _mi_random_init(heap->thread_id);
@@ -226,9 +222,6 @@ static bool _mi_heap_init(void) {
     tld->segments.stats = &tld->stats;
     tld->segments.os = &tld->os;
     tld->os.stats = &tld->stats;
-    tld->os.reset_delay = reset_delay;
-    memset(reset_delay, 0, sizeof(*reset_delay));
-    reset_delay->capacity = MI_RESET_DELAY_SLOTS;
     _mi_heap_set_default_direct(heap);
   }
   return false;
diff --git a/src/memory.c b/src/memory.c
index b0bcf7a0..94b6348f 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -54,6 +54,7 @@ void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s
 void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 
 
+
 // Constants
 #if (MI_INTPTR_SIZE==8)
 #define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 48KiB for the region map 
@@ -73,28 +74,26 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
 
 // Region info is a pointer to the memory region and two bits for 
 // its flags: is_large, and is_committed.
-typedef uintptr_t mi_region_info_t;
-
-static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) {
-  return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0));
-}
-
-static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) {
-  if (is_large) *is_large = ((info&0x02) != 0);
-  if (is_committed) *is_committed = ((info&0x01) != 0);
-  return (void*)(info & ~0x03);
-}
+typedef union mi_region_info_u {
+  uintptr_t value;
+  struct {
+    bool  valid;
+    bool  is_large;
+    int   numa_node;
+  };
+} mi_region_info_t;
 
 
 // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
-  volatile _Atomic(mi_region_info_t) info;        // start of the memory area (and flags)
-  volatile _Atomic(uintptr_t)        numa_node;   // associated numa node + 1 (so 0 is no association)
+  volatile _Atomic(uintptr_t)        info;        // is_large, and associated numa node + 1 (so 0 is no association)
+  volatile _Atomic(void*)            start;       // start of the memory area (and flags)
   mi_bitmap_field_t                  in_use;      // bit per in-use block
   mi_bitmap_field_t                  dirty;       // track if non-zero per block
   mi_bitmap_field_t                  commit;      // track if committed per block (if `!info.is_committed))
-  size_t                             arena_memid; // if allocated from a (huge page) arena
+  mi_bitmap_field_t                  reset;       // track reset per block
+  volatile _Atomic(uintptr_t)        arena_memid; // if allocated from a (huge page) arena-
 } mem_region_t;
 
 // The region map
@@ -113,24 +112,32 @@ static size_t mi_region_block_count(size_t size) {
   return _mi_divide_up(size, MI_SEGMENT_SIZE);
 }
 
+/*
 // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
 static size_t mi_good_commit_size(size_t size) {
   if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
   return _mi_align_up(size, _mi_os_large_page_size());
 }
+*/
 
 // Return if a pointer points into a region reserved by us.
 bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
   if (p==NULL) return false;
   size_t count = mi_atomic_read_relaxed(&regions_count);
   for (size_t i = 0; i < count; i++) {
-    uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(&regions[i].info), NULL, NULL);
+    uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(&regions[i].start);
     if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
   }
   return false;
 }
 
 
+static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
+  void* start = mi_atomic_read_ptr(&region->start);
+  mi_assert_internal(start != NULL);
+  return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE));  
+}
+
 static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
   mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
   size_t idx = region - regions;
@@ -142,13 +149,10 @@ static size_t mi_memid_create_from_arena(size_t arena_memid) {
   return (arena_memid << 1) | 1;
 }
 
-static bool mi_memid_is_arena(size_t id) {
-  return ((id&1)==1);
-}
 
-static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
-  if (mi_memid_is_arena(id)) {
-    *arena_memid = (id>>1);
+static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
+  if ((id&1)==1) {
+    if (arena_memid != NULL) *arena_memid = (id>>1);
     return true;
   }
   else {
@@ -159,6 +163,7 @@ static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t
   }
 }
 
+
 /* ----------------------------------------------------------------------------
   Allocate a region is allocated from the OS (or an arena)
 -----------------------------------------------------------------------------*/
@@ -187,16 +192,21 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
 
   // allocated, initialize and claim the initial blocks
   mem_region_t* r = &regions[idx];
-  r->numa_node = _mi_os_numa_node(tld) + 1;
-  r->arena_memid = arena_memid;
+  r->arena_memid  = arena_memid;
   mi_atomic_write(&r->in_use, 0);
   mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL));
   mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0));
+  mi_atomic_write(&r->reset, 0);
   *bit_idx = 0;
   mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
+  mi_atomic_write_ptr(&r->start, start);
 
   // and share it 
-  mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others
+  mi_region_info_t info;
+  info.valid = true;
+  info.is_large = region_large;
+  info.numa_node = _mi_os_numa_node(tld);
+  mi_atomic_write(&r->info, info.value); // now make it available to others
   *region = r;
   return true;
 }
@@ -207,36 +217,33 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
 
 static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
   // initialized at all?
-  mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
-  if (info==0) return false;
+  mi_region_info_t info;
+  info.value = mi_atomic_read_relaxed(&region->info);
+  if (info.value==0) return false;
 
   // numa correct
   if (numa_node >= 0) {  // use negative numa node to always succeed
-    int rnode = ((int)mi_atomic_read_relaxed(&region->numa_node)) - 1;
+    int rnode = info.numa_node;
     if (rnode >= 0 && rnode != numa_node) return false;
   }
 
   // check allow-large
-  bool is_large;
-  bool is_committed;
-  mi_region_info_read(info, &is_large, &is_committed);  
-  if (!allow_large && is_large) return false;
+  if (!allow_large && info.is_large) return false;
 
   return true;
 }
 
 
-static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
+static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
-  // try all regions for a free slot
-  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
+  // try all regions for a free slot  
   const size_t count = mi_atomic_read(&regions_count);
   size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? 
   for (size_t visited = 0; visited < count; visited++, idx++) {
     if (idx >= count) idx = 0;  // wrap around
     mem_region_t* r = &regions[idx];
     if (mi_region_is_suitable(r, numa_node, allow_large)) {
-      if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
+      if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
         tld->region_idx = idx;    // remember the last found position
         *region = r;
         return true;
@@ -252,8 +259,9 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
   mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
   mem_region_t* region;
   mi_bitmap_index_t bit_idx;
-  // first try to claim in existing regions
-  if (!mi_region_try_claim(blocks, *is_large, &region, &bit_idx, tld)) {
+  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
+  // try to claim in existing regions
+  if (!mi_region_try_claim(numa_node, blocks, *is_large, &region, &bit_idx, tld)) {
     // otherwise try to allocate a fresh region
     if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
       // out of regions or memory
@@ -261,30 +269,28 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
     }
   }
   
+  
   // found a region and claimed `blocks` at `bit_idx`
   mi_assert_internal(region != NULL);
   mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
 
-  mi_region_info_t info = mi_atomic_read(&region->info);
-  bool region_is_committed = false;
-  bool region_is_large = false;
-  void* start = mi_region_info_read(info, &region_is_large, &region_is_committed);
-  mi_assert_internal(!(region_is_large && !*is_large));
+  mi_region_info_t info;
+  info.value = mi_atomic_read(&region->info);
+  void* start = mi_atomic_read_ptr(&region->start);
+  mi_assert_internal(!(info.is_large && !*is_large));
   mi_assert_internal(start != NULL);
 
-  *is_zero = mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);  
-  *is_large = region_is_large;
+  *is_zero = mi_bitmap_unclaim(&region->dirty, 1, blocks, bit_idx);  
+  *is_large = info.is_large;
   *memid = mi_memid_create(region, bit_idx);
   void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
-  if (region_is_committed) {
-    // always committed
-    *commit = true;
-  }
-  else if (*commit) {
+
+  // commit
+  if (*commit) {
     // ensure commit
-    bool any_zero;
-    mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_zero);
-    if (any_zero) {
+    bool any_uncommitted;
+    mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
+    if (any_uncommitted) {
       bool commit_zero;
       _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld);
       if (commit_zero) *is_zero = true;
@@ -294,6 +300,21 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
     // no need to commit, but check if already fully committed
     *commit = mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
   }  
+  mi_assert_internal(mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
+
+  // unreset reset blocks
+  if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
+    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); 
+    mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
+    bool reset_zero;
+    _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
+    if (reset_zero) *is_zero = true;
+  }
+  mi_assert_internal(!mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
+
+  #if (MI_DEBUG>=2)
+  if (*commit) { ((uint8_t*)p)[0] = 0; }
+  #endif
   
   // and return the allocation  
   mi_assert_internal(p != NULL);  
@@ -325,7 +346,9 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
     void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
     mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0);    
     if (p != NULL) {
+      #if (MI_DEBUG>=2)
       if (*commit) { ((uint8_t*)p)[0] = 0; }
+      #endif
       return p;
     }
     _mi_warning_message("unable to allocate from region: size %zu\n", size);
@@ -346,56 +369,56 @@ Free
 -----------------------------------------------------------------------------*/
 
 // Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
+void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
   mi_assert_internal(size > 0 && tld != NULL);
   if (p==NULL) return;
   if (size==0) return;
-
+  size = _mi_align_up(size, _mi_os_page_size());
+  
   size_t arena_memid = 0;
   mi_bitmap_index_t bit_idx;
   mem_region_t* region;
-  if (mi_memid_indices(id,&region,&bit_idx,&arena_memid)) {
+  if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
    // was a direct arena allocation, pass through
     _mi_arena_free(p, size, arena_memid, tld->stats);
   }
   else {
     // allocated in a region
     mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
-    // we can align the size up to page size (as we allocate that way too)
-    // this ensures we fully commit/decommit/reset
-    size = _mi_align_up(size, _mi_os_page_size());
     const size_t blocks = mi_region_block_count(size);
-    mi_region_info_t info = mi_atomic_read(&region->info);
-    bool is_large;
-    bool is_committed;
-    void* start = mi_region_info_read(info, &is_large, &is_committed);
-    mi_assert_internal(start != NULL);
-    void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE);
+    mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
+    mi_region_info_t info;
+    info.value = mi_atomic_read(&region->info);
+    mi_assert_internal(info.value != 0);
+    void* blocks_start = mi_region_blocks_start(region, bit_idx);
     mi_assert_internal(blocks_start == p); // not a pointer in our area?
     mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
     if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
 
-    // decommit (or reset) the blocks to reduce the working set.
-    // TODO: implement delayed decommit/reset as these calls are too expensive
-    // if the memory is reused soon.
-    // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large
-    if (!is_large &&
-        mi_option_is_enabled(mi_option_segment_reset) &&
-        mi_option_is_enabled(mi_option_eager_commit))  // cannot reset halfway committed segments, use `option_page_reset` instead            
-    {
-      // note: don't use `_mi_mem_reset` as it is shared with other threads!
-      _mi_os_reset(p, size, tld->stats);    // TODO: maintain reset bits to unreset  
-    }
-    if (!is_committed) {
-      // adjust commit statistics as we commit again when re-using the same slot
-      _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size));
+    // committed?
+    if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
+      mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
     }
 
-    // TODO: should we free empty regions? currently only done _mi_mem_collect.
-    // this frees up virtual address space which might be useful on 32-bit systems?
+    if (any_reset) {
+      // set the is_reset bits if any pages were reset
+      mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
+    }
+
+    // reset the blocks to reduce the working set.
+    if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) &&
+        mi_option_is_enabled(mi_option_eager_commit))  // cannot reset halfway committed segments, use only `option_page_reset` instead            
+    {
+      bool any_unreset;
+      mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
+      if (any_unreset) {
+        _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
+      }
+    }    
 
     // and unclaim
-    mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
+    bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
+    mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed);
   }
 }
 
@@ -416,13 +439,14 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
       } while(m == 0 && !mi_atomic_cas_weak(&region->in_use, MI_BITMAP_FIELD_FULL, 0 ));
       if (m == 0) {
         // on success, free the whole region
-        bool is_eager_committed;
-        void* start = mi_region_info_read(mi_atomic_read(&regions[i].info), NULL, &is_eager_committed);
-        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
-          _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
+        void* start = mi_atomic_read_ptr(&regions[i].start);
+        size_t arena_memid = mi_atomic_read_relaxed(&regions[i].arena_memid);
+        memset(&regions[i], 0, sizeof(mem_region_t));
+        // and release the whole region
+        mi_atomic_write(&region->info, 0);
+        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {          
+          _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
         }
-        // and release
-        mi_atomic_write(&region->info,0);
       }
     }
   }
@@ -432,6 +456,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
 /* ----------------------------------------------------------------------------
   Other
 -----------------------------------------------------------------------------*/
+
 bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
   return _mi_os_reset(p, size, tld->stats);
 }
diff --git a/src/options.c b/src/options.c
index 8c4c1707..9b6e4cd0 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,10 +65,11 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 1, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
+  { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
+  { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset uses decommit/commit
+  { 500,UNINIT, MI_OPTION(reset_delay) },        // reset delay in milli-seconds
   { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
diff --git a/src/os.c b/src/os.c
index 02683a02..553d72c9 100644
--- a/src/os.c
+++ b/src/os.c
@@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) {
 // round to a good OS allocation size (bounded by max 12.5% waste)
 size_t _mi_os_good_alloc_size(size_t size) {
   size_t align_size;
-  if (size < 512 * KiB) align_size = _mi_os_page_size();
-  else if (size < 2 * MiB) align_size = 64 * KiB;
-  else if (size < 8 * MiB) align_size = 256 * KiB;
-  else if (size < 32 * MiB) align_size = 1 * MiB;
-  else align_size = 4 * MiB;
+  if (size < 512*KiB) align_size = _mi_os_page_size();
+  else if (size < 2*MiB) align_size = 64*KiB;
+  else if (size < 8*MiB) align_size = 256*KiB;
+  else if (size < 32*MiB) align_size = 1*MiB;
+  else align_size = 4*MiB;
   if (size >= (SIZE_MAX - align_size)) return size; // possible overflow?
   return _mi_align_up(size, align_size);
 }
@@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) {
 // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
 // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's.
 #include <winternl.h>
-typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
-typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG);
+typedef PVOID    (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
+typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG);
 static PVirtualAlloc2 pVirtualAlloc2 = NULL;
 static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
 
@@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages()
     if (err == 0) err = GetLastError();
     _mi_warning_message("cannot enable large OS page support, error %lu\n", err);
   }
-  return (ok != 0);
+  return (ok!=0);
 }
 
 void _mi_os_init(void) {
@@ -144,7 +144,7 @@ void _mi_os_init(void) {
   if (hDll != NULL) {
     // use VirtualAlloc2FromApp if possible as it is available to Windows store apps
     pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
-    if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
+    if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
     FreeLibrary(hDll);
   }
   hDll = LoadLibrary(TEXT("ntdll.dll"));
@@ -170,7 +170,7 @@ void _mi_os_init() {
     os_alloc_granularity = os_page_size;
   }
   if (mi_option_is_enabled(mi_option_large_os_pages)) {
-    large_os_page_size = 2 * MiB;
+    large_os_page_size = 2*MiB;
   }
 }
 #endif
@@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
 #if (MI_INTPTR_SIZE >= 8)
   // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations
   void* hint;
-  if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) {
+  if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) {
     return VirtualAlloc(hint, size, flags, PAGE_READWRITE);
   }
 #endif
@@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
   static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
   void* p = NULL;
   if ((large_only || use_large_os_page(size, try_alignment))
-    && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) {
+      && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
     uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
     if (!large_only && try_ok > 0) {
       // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
@@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
       if (large_only) return p;
       // fall back to non-large page allocation on error (`p == NULL`).
       if (p == NULL) {
-        mi_atomic_write(&large_page_try_ok, 10);  // on error, don't try again for the next N allocations
+        mi_atomic_write(&large_page_try_ok,10);  // on error, don't try again for the next N allocations
       }
     }
   }
   if (p == NULL) {
-    *is_large = ((flags & MEM_LARGE_PAGES) != 0);
+    *is_large = ((flags&MEM_LARGE_PAGES) != 0);
     p = mi_win_virtual_allocx(addr, size, try_alignment, flags);
   }
   if (p == NULL) {
@@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
 #elif defined(__wasi__)
 static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
   uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size();
-  uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment);
-  size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size());
+  uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment);
+  size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size());
   mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0);
   if (alloc_size < size) return NULL;
   if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) {
@@ -278,50 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
 #define MI_OS_USE_MMAP
 static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
   void* p = NULL;
-#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
+  #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
   // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
   void* hint;
   if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) {
-    p = mmap(hint, size, protect_flags, flags, fd, 0);
-    if (p == MAP_FAILED) p = NULL; // fall back to regular mmap
+    p = mmap(hint,size,protect_flags,flags,fd,0);
+    if (p==MAP_FAILED) p = NULL; // fall back to regular mmap
   }
-#else
+  #else
   UNUSED(try_alignment);
-#endif
-  if (p == NULL) {
-    p = mmap(addr, size, protect_flags, flags, fd, 0);
-    if (p == MAP_FAILED) p = NULL;
+  #endif
+  if (p==NULL) {
+    p = mmap(addr,size,protect_flags,flags,fd,0);
+    if (p==MAP_FAILED) p = NULL;
   }
   return p;
 }
 
 static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
   void* p = NULL;
-#if !defined(MAP_ANONYMOUS)
-#define MAP_ANONYMOUS  MAP_ANON
-#endif
-#if !defined(MAP_NORESERVE)
-#define MAP_NORESERVE  0
-#endif
+  #if !defined(MAP_ANONYMOUS)
+  #define MAP_ANONYMOUS  MAP_ANON
+  #endif
+  #if !defined(MAP_NORESERVE)
+  #define MAP_NORESERVE  0
+  #endif
   int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
   int fd = -1;
-#if defined(MAP_ALIGNED)  // BSD
+  #if defined(MAP_ALIGNED)  // BSD
   if (try_alignment > 0) {
     size_t n = _mi_bsr(try_alignment);
     if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
       flags |= MAP_ALIGNED(n);
     }
   }
-#endif
-#if defined(PROT_MAX)
+  #endif
+  #if defined(PROT_MAX)
   protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
-#endif
-#if defined(VM_MAKE_TAG)
-// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
+  #endif
+  #if defined(VM_MAKE_TAG)
+  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
   int os_tag = (int)mi_option_get(mi_option_os_tag);
   if (os_tag < 100 || os_tag > 255) os_tag = 100;
   fd = VM_MAKE_TAG(os_tag);
-#endif
+  #endif
   if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
     static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
     uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
@@ -335,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
     else {
       int lflags = flags;
       int lfd = fd;
-#ifdef MAP_ALIGNED_SUPER
+      #ifdef MAP_ALIGNED_SUPER
       lflags |= MAP_ALIGNED_SUPER;
-#endif
-#ifdef MAP_HUGETLB
+      #endif
+      #ifdef MAP_HUGETLB
       lflags |= MAP_HUGETLB;
-#endif
-#ifdef MAP_HUGE_1GB
+      #endif
+      #ifdef MAP_HUGE_1GB
       static bool mi_huge_pages_available = true;
       if ((size % GiB) == 0 && mi_huge_pages_available) {
         lflags |= MAP_HUGE_1GB;
       }
       else
-#endif
+      #endif
       {
-#ifdef MAP_HUGE_2MB
+        #ifdef MAP_HUGE_2MB
         lflags |= MAP_HUGE_2MB;
-#endif
+        #endif
       }
-#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
+      #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
       lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
-#endif
+      #endif
       if (large_only || lflags != flags) {
         // try large OS page allocation
         *is_large = true;
         p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
-#ifdef MAP_HUGE_1GB
+        #ifdef MAP_HUGE_1GB
         if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
           mi_huge_pages_available = false; // don't try huge 1GiB pages again
           _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno);
           lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
           p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
         }
-#endif
+        #endif
         if (large_only) return p;
         if (p == NULL) {
           mi_atomic_write(&large_page_try_ok, 10);  // on error, don't try again for the next N allocations
@@ -378,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   if (p == NULL) {
     *is_large = false;
     p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
-#if defined(MADV_HUGEPAGE)
+    #if defined(MADV_HUGEPAGE)
     // Many Linux systems don't allow MAP_HUGETLB but they support instead
     // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE
     // though since properly aligned allocations will already use large pages if available
@@ -390,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
         *is_large = true; // possibly
       };
     }
-#endif
+    #endif
   }
   return p;
 }
@@ -404,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base;
 // Return a 4MiB aligned address that is probably available
 static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
   if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL;
-  if ((size % MI_SEGMENT_SIZE) != 0) return NULL;
+  if ((size%MI_SEGMENT_SIZE) != 0) return NULL;
   intptr_t hint = mi_atomic_add(&aligned_base, size);
-  if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
+  if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
     intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area
-#if (MI_SECURE>0 || MI_DEBUG==0)     // security: randomize start of aligned allocations unless in debug mode
+    #if (MI_SECURE>0 || MI_DEBUG==0)     // security: randomize start of aligned allocations unless in debug mode
     uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint);
-    init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF));  // (randomly 0-64k)*4MiB == 0 to 256GiB
-#endif
+    init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF));  // (randomly 0-64k)*4MiB == 0 to 256GiB
+    #endif
     mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size);
     hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
   }
-  if (hint % try_alignment != 0) return NULL;
+  if (hint%try_alignment != 0) return NULL;
   return (void*)hint;
 }
 #else
@@ -444,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
   }
   */
 
-#if defined(_WIN32)
-  int flags = MEM_RESERVE;
-  if (commit) flags |= MEM_COMMIT;
-  p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
-#elif defined(__wasi__)
-  *is_large = false;
-  p = mi_wasm_heap_grow(size, try_alignment);
-#else
-  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
-  p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
-#endif
+  #if defined(_WIN32)
+    int flags = MEM_RESERVE;
+    if (commit) flags |= MEM_COMMIT;
+    p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
+  #elif defined(__wasi__)
+    *is_large = false;
+    p = mi_wasm_heap_grow(size, try_alignment);
+  #else
+    int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
+    p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
+  #endif
   mi_stat_counter_increase(stats->mmap_calls, 1);
   if (p != NULL) {
     _mi_stat_increase(&stats->reserved, size);
@@ -564,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
     allow_large = *large;
     *large = false;
   }
-  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats);
+  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats);
 }
 
 
@@ -616,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
     _mi_stat_decrease(&stats->committed, csize);
   }
 
-#if defined(_WIN32)
+  #if defined(_WIN32)
   if (commit) {
     // if the memory was already committed, the call succeeds but it is not zero'd
     // *is_zero = true;
@@ -627,9 +627,9 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
     BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT);
     err = (ok ? 0 : GetLastError());
   }
-#elif defined(__wasi__)
+  #elif defined(__wasi__)
   // WebAssembly guests can't control memory protection
-#elif defined(MAP_FIXED)
+  #elif defined(MAP_FIXED)
   if (!commit) {
     // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
     void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0);
@@ -640,10 +640,10 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
     err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
     if (err != 0) { err = errno; }
   }
-#else
+  #else
   err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE));
   if (err != 0) { err = errno; }
-#endif
+  #endif
   if (err != 0) {
     _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
   }
@@ -674,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
   if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr)
   if (reset) _mi_stat_increase(&stats->reset, csize);
-  else _mi_stat_decrease(&stats->reset, csize);
+        else _mi_stat_decrease(&stats->reset, csize);
   if (!reset) return true; // nothing to do on unreset!
 
-#if (MI_DEBUG>1)
-  if (MI_SECURE == 0) {
+  #if (MI_DEBUG>1)
+  if (MI_SECURE==0) {
     memset(start, 0, csize); // pretend it is eagerly reset
   }
-#endif
+  #endif
 
 #if defined(_WIN32)
   // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory
   void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE);
   mi_assert_internal(p == start);
-#if 1
+  #if 1
   if (p == start && start != NULL) {
-    VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set
+    VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set
   }
-#endif
+  #endif
   if (p != start) return false;
 #else
 #if defined(MADV_FREE)
@@ -748,7 +748,7 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   if (csize == 0) return false;
   /*
   if (_mi_os_is_huge_reserved(addr)) {
-    _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
+	  _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
   }
   */
   int err = 0;
@@ -780,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) {
 
 bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
   // page align conservatively within the range
-  mi_assert_internal(oldsize > newsize&& p != NULL);
+  mi_assert_internal(oldsize > newsize && p != NULL);
   if (oldsize < newsize || p == NULL) return false;
   if (oldsize == newsize) return true;
 
@@ -808,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`)
 #if defined(WIN32) && (MI_INTPTR_SIZE >= 8)
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 {
-  mi_assert_internal(size % GiB == 0);
+  mi_assert_internal(size%GiB == 0);
   mi_assert_internal(addr != NULL);
   const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
 
   mi_win_enable_large_os_pages();
 
-#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
+  #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
   static bool mi_huge_pages_available = true;
   if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
-#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
-#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
-#endif
+    #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
+    #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE  (0x10)
+    #endif
     params[0].Type = 5; // == MemExtendedParameterAttributeFlags;
     params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
     ULONG param_count = 1;
@@ -848,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
     params[0].ULong = (unsigned)numa_node;
     return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
   }
-#endif
+  #endif
   // otherwise use regular virtual alloc on older windows
   return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
 }
@@ -869,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons
 }
 #endif
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
-  mi_assert_internal(size % GiB == 0);
+  mi_assert_internal(size%GiB == 0);
   bool is_large = true;
   void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (p == NULL) return NULL;
-  if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes
+  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
     uintptr_t numa_mask = (1UL << numa_node);
     // TODO: does `mbind` work correctly for huge OS pages? should we
     // use `set_mempolicy` before calling mmap instead?
     // see: <https://lkml.org/lkml/2017/2/9/875>
-    long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0);
+    long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
     if (err != 0) {
       _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
     }
@@ -910,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
       start = ((uintptr_t)32 << 40);  // 32TiB virtual start address
 #if (MI_SECURE>0 || MI_DEBUG==0)      // security: randomize start of huge pages unless in debug mode
       uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages);
-      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
+      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
 #endif
     }
     end = start + size;
@@ -963,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
     if (max_msecs > 0) {
       mi_msecs_t elapsed = _mi_clock_end(start_t);
       if (page >= 1) {
-        mi_msecs_t estimate = ((elapsed / (page + 1)) * pages);
-        if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break
+        mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
+        if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
           elapsed = max_msecs + 1;
         }
       }
@@ -974,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
       }
     }
   }
-  mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size);
+  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
   if (pages_reserved != NULL) *pages_reserved = page;
   if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE;
   return (page == 0 ? NULL : start);
@@ -983,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
 // free every huge page in a range individually (as we allocated per page)
 // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
 void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
-  if (p == NULL || size == 0) return;
+  if (p==NULL || size==0) return;
   uint8_t* base = (uint8_t*)p;
   while (size >= MI_HUGE_OS_PAGE_SIZE) {
     _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats);
@@ -999,7 +999,7 @@ static size_t mi_os_numa_nodex() {
   PROCESSOR_NUMBER pnum;
   USHORT numa_node = 0;
   GetCurrentProcessorNumberEx(&pnum);
-  GetNumaProcessorNodeEx(&pnum, &numa_node);
+  GetNumaProcessorNodeEx(&pnum,&numa_node);
   return numa_node;
 }
 
@@ -1026,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) {
 static size_t mi_os_numa_node_countx(void) {
   char buf[128];
   unsigned node = 0;
-  for (node = 0; node < 256; node++) {
+  for(node = 0; node < 256; node++) {
     // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
     snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
-    if (access(buf, R_OK) != 0) break;
+    if (access(buf,R_OK) != 0) break;
   }
-  return (node + 1);
+  return (node+1);
 }
 #else
 static size_t mi_os_numa_nodex(void) {
@@ -1058,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) {
 int _mi_os_numa_node_get(mi_os_tld_t* tld) {
   UNUSED(tld);
   size_t numa_count = _mi_os_numa_node_count();
-  if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0
+  if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
   // never more than the node count and >= 0
   size_t numa_node = mi_os_numa_nodex();
   if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
diff --git a/src/page.c b/src/page.c
index 9085ccb5..df6ecc71 100644
--- a/src/page.c
+++ b/src/page.c
@@ -75,7 +75,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
 
   mi_segment_t* segment = _mi_page_segment(page);
   uint8_t* start = _mi_page_start(segment,page,NULL);
-  mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL));
+  mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL,NULL));
   //mi_assert_internal(start + page->capacity*page->block_size == page->top);
 
   mi_assert_internal(mi_page_list_is_valid(page,page->free));
@@ -229,6 +229,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_expensive(mi_page_is_valid_init(page));
   mi_assert_internal(page->heap == NULL);
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
+  mi_assert_internal(!page->is_reset);  
   _mi_page_free_collect(page,false);
   mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
   mi_page_queue_push(heap, pq, page);
@@ -342,7 +343,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(pq == mi_page_queue_of(page));
   mi_assert_internal(page->heap != NULL);
-
+  
 #if MI_DEBUG > 1
   mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
 #endif
@@ -597,7 +598,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(block_size > 0);
   // set fields
   size_t page_size;
-  _mi_segment_page_start(segment, page, block_size, &page_size);
+  _mi_segment_page_start(segment, page, block_size, &page_size, NULL);
   page->block_size = block_size;
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
diff --git a/src/segment.c b/src/segment.c
index 549dd339..ffba8c0d 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -13,6 +13,8 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #define MI_PAGE_HUGE_ALIGN  (256*1024)
 
+static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size);
+
 /* -----------------------------------------------------------
   Segment allocation
   We allocate pages inside big OS allocated "segments"
@@ -40,7 +42,6 @@ terms of the MIT license. A copy of the license can be found in the file
   Queue of segments containing free pages
 ----------------------------------------------------------- */
 
-
 #if (MI_DEBUG>=3)
 static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) {
   mi_assert_internal(segment != NULL);
@@ -143,31 +144,50 @@ static bool mi_segment_is_valid(mi_segment_t* segment) {
 }
 #endif
 
+
+/* -----------------------------------------------------------
+  Page reset
+----------------------------------------------------------- */
+
+static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) {
+  if (!mi_option_is_enabled(mi_option_page_reset)) return;
+  if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return;
+  size_t psize;
+  void* start = mi_segment_raw_page_start(segment, page, &psize);
+  page->is_reset = true;
+  mi_assert_internal(size <= psize);
+  _mi_mem_reset(start, ((size == 0 || size > psize) ? psize : size), tld->os);
+}
+
+static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld)
+{  
+  mi_assert_internal(page->is_reset);  
+  mi_assert_internal(!segment->mem_is_fixed);
+  page->is_reset = false;
+  size_t psize;
+  uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
+  bool is_zero = false;
+  _mi_mem_unreset(start, ((size == 0 || size > psize) ? psize : size), &is_zero, tld->os);
+  if (is_zero) page->is_zero_init = true;
+}
+
+
 /* -----------------------------------------------------------
  Segment size calculations
 ----------------------------------------------------------- */
 
-// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set)
-uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size)
-{
+// Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set)
+// The raw start is not taking aligned block allocation into consideration.
+static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
   size_t   psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift);
-  uint8_t* p     = (uint8_t*)segment + page->segment_idx*psize;
+  uint8_t* p = (uint8_t*)segment + page->segment_idx * psize;
 
   if (page->segment_idx == 0) {
     // the first page starts after the segment info (and possible guard page)
-    p     += segment->segment_info_size;
+    p += segment->segment_info_size;
     psize -= segment->segment_info_size;
-    // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
-    if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) {
-      size_t adjust = block_size - ((uintptr_t)p % block_size);
-      if (adjust < block_size) {
-        p     += adjust;
-        psize -= adjust;
-      }
-      mi_assert_internal((uintptr_t)p % block_size == 0);
-    }
   }
-  
+
   if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) {
     // secure == 1: the last page has an os guard page at the end
     // secure >  1: every page has an os guard page
@@ -175,19 +195,36 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
   }
 
   if (page_size != NULL) *page_size = psize;
-  mi_assert_internal(_mi_ptr_page(p) == page);
+  mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page);
   mi_assert_internal(_mi_ptr_segment(p) == segment);
   return p;
 }
 
-static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) {
-  /*
-  if (mi_option_is_enabled(mi_option_secure)) {
-    // always reserve maximally so the protection falls on
-    // the same address area, as we need to reuse them from the caches interchangably.
-    capacity = MI_SMALL_PAGES_PER_SEGMENT;
+// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set)
+uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size)
+{
+  size_t   psize;
+  uint8_t* p = mi_segment_raw_page_start(segment, page, &psize);
+  if (pre_size != NULL) *pre_size = 0;
+  if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) {
+    // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
+    size_t adjust = block_size - ((uintptr_t)p % block_size);
+    if (adjust < block_size) {
+      p += adjust;
+      psize -= adjust;
+      if (pre_size != NULL) *pre_size = adjust;
+    }
+    mi_assert_internal((uintptr_t)p % block_size == 0);
   }
-  */
+    
+  if (page_size != NULL) *page_size = psize;
+  mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page);
+  mi_assert_internal(_mi_ptr_segment(p) == segment);
+  return p;
+}
+
+static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) 
+{
   const size_t minsize   = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */;
   size_t guardsize = 0;
   size_t isize     = 0;
@@ -234,7 +271,15 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
     mi_assert_internal(!segment->mem_is_fixed);
     _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
   }
-  _mi_mem_free(segment, segment_size, segment->memid, tld->os);
+  
+  bool fully_committed = true;
+  bool any_reset = false;
+  for (size_t i = 0; i < segment->capacity; i++) {
+    const mi_page_t* page = &segment->pages[i];    
+    if (!page->is_committed) fully_committed = false;
+    if (page->is_reset) any_reset = true;
+  }
+  _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os);
 }
 
 
@@ -275,7 +320,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld)
 
 static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld));
-  mi_assert_internal(segment->next == NULL);
+  mi_assert_internal(segment->next == NULL);  
   if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
     return false;
   }
@@ -328,31 +373,31 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager || (page_kind >= MI_PAGE_LARGE);
-  bool protection_still_good = false;
+  bool pages_still_good = false;
   bool is_zero = false;
   
   // Try to get it from our thread local cache first
-  mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
+  mi_segment_t* segment = NULL; // mi_segment_cache_pop(segment_size, tld);
   if (segment != NULL) {
-    if (MI_SECURE!=0) {
-      mi_assert_internal(!segment->mem_is_fixed);
-      if (segment->page_kind != page_kind) {
+    if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) {
+      pages_still_good = true;
+    }
+    else 
+    {
+      // different page kinds; unreset any reset pages, and unprotect
+      // TODO: optimize cache pop to return fitting pages if possible?
+      for (size_t i = 0; i < segment->capacity; i++) {
+        mi_page_t* page = &segment->pages[i];
+        if (page->is_reset) { 
+          mi_page_unreset(segment, page, 0, tld);  // todo: only unreset the part that was reset? (instead of the full page)
+        }
+      }
+      if (MI_SECURE!=0) {
+        mi_assert_internal(!segment->mem_is_fixed);
+        // TODO: should we unprotect per page? (with is_protected flag?)
         _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs
       }
-      else {
-        protection_still_good = true; // otherwise, the guard pages are still in place
-      }
-    }
-    if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) {
-      mi_assert_internal(!segment->mem_is_fixed);
-      _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os);
-      segment->mem_is_committed = true;
-    }
-    if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_page_reset)) {
-      bool reset_zero = false;
-      _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os);
-      if (reset_zero) is_zero = true;
-    }
+    }    
   }
   else {
     // Allocate the segment from the OS
@@ -373,27 +418,42 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   }
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
-  // zero the segment info (but not the `mem` fields)
-  ptrdiff_t ofs = offsetof(mi_segment_t,next);
-  memset((uint8_t*)segment + ofs, 0, info_size - ofs);    
-
-  // guard pages
-  if ((MI_SECURE != 0) && !protection_still_good) {
-    // in secure mode, we set up a protected page in between the segment info
-    // and the page data
-    mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0);
-    _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) );
-    size_t os_page_size = _mi_os_page_size();
-    if (MI_SECURE <= 1) {
-      // and protect the last page too
-      _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size );
-    }
-    else {
-      // protect every page
-      for (size_t i = 0; i < capacity; i++) {
-        _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size );
+  if (!pages_still_good) {    
+    // guard pages
+    if (MI_SECURE != 0) {
+      // in secure mode, we set up a protected page in between the segment info
+      // and the page data
+      mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0);
+      _mi_mem_protect((uint8_t*)segment + info_size, (pre_size - info_size));
+      const size_t os_page_size = _mi_os_page_size();
+      if (MI_SECURE <= 1) {
+        // and protect the last page too
+        _mi_mem_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size);
+      }
+      else {
+        // protect every page
+        for (size_t i = 0; i < capacity; i++) {
+          _mi_mem_protect((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size);
+        }
       }
     }
+
+    // zero the segment info (but not the `mem` fields)
+    ptrdiff_t ofs = offsetof(mi_segment_t, next);
+    memset((uint8_t*)segment + ofs, 0, info_size - ofs);
+
+    // initialize pages info
+    for (uint8_t i = 0; i < capacity; i++) {
+      segment->pages[i].segment_idx = i;
+      segment->pages[i].is_reset = false;
+      segment->pages[i].is_committed = commit;
+      segment->pages[i].is_zero_init = is_zero;
+    }
+  }
+  else {
+    // zero the segment info but not the pages info (and mem fields)
+    ptrdiff_t ofs = offsetof(mi_segment_t, next);
+    memset((uint8_t*)segment + ofs, 0, offsetof(mi_segment_t,pages) - ofs);
   }
 
   // initialize
@@ -404,13 +464,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   segment->segment_info_size = pre_size;
   segment->thread_id  = _mi_thread_id();
   segment->cookie = _mi_ptr_cookie(segment);
-  for (uint8_t i = 0; i < segment->capacity; i++) {
-    segment->pages[i].segment_idx = i;
-    segment->pages[i].is_reset = false;
-    segment->pages[i].is_committed = commit;
-    segment->pages[i].is_zero_init = is_zero;
-  }
   _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
+  
   //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
   return segment;
 }
@@ -463,24 +518,22 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
   for (size_t i = 0; i < segment->capacity; i++) {
     mi_page_t* page = &segment->pages[i];
     if (!page->segment_in_use) {
-      if (page->is_reset || !page->is_committed) {
+      // set in-use before doing unreset to prevent delayed reset
+      page->segment_in_use = true;
+      segment->used++;                
+      if (!page->is_committed) {
+        mi_assert_internal(!segment->mem_is_fixed);
+        mi_assert_internal(!page->is_reset);
         size_t psize;
-        uint8_t* start = _mi_page_start(segment, page, &psize);        
-        if (!page->is_committed) {
-          mi_assert_internal(!segment->mem_is_fixed);
-          page->is_committed = true;
-          bool is_zero = false;
-          _mi_mem_commit(start,psize,&is_zero,tld->os);
-          if (is_zero) page->is_zero_init = true;
-        }
-        if (page->is_reset) {
-          mi_assert_internal(!segment->mem_is_fixed);
-          page->is_reset = false;
-          bool is_zero = false;
-          _mi_mem_unreset(start, psize, &is_zero, tld->os);
-          if (is_zero) page->is_zero_init = true;
-        }
+        uint8_t* start = _mi_page_start(segment, page, &psize);
+        page->is_committed = true;
+        bool is_zero = false;
+        _mi_mem_commit(start,psize,&is_zero,tld->os);
+        if (is_zero) page->is_zero_init = true;
       }
+      if (page->is_reset) {
+        mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset?
+      }      
       return page;
     }
   }
@@ -503,22 +556,21 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
   
-  // reset the page memory to reduce memory pressure?
-  if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) 
-       // && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets
-  {
-    size_t psize;
-    uint8_t* start = _mi_page_start(segment, page, &psize);
-    page->is_reset = true;
-    _mi_mem_reset(start, psize, tld->os);
-  }
+  // calculate the used size from the raw (non-aligned) start of the page
+  size_t pre_size;
+  _mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size);
+  size_t used_size = pre_size + (page->capacity * page->block_size);
 
-  // zero the page data, but not the segment fields
+  // zero the page data, but not the segment fields  
   page->is_zero_init = false;
   ptrdiff_t ofs = offsetof(mi_page_t,capacity);
   memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
   page->segment_in_use = false;
   segment->used--;
+
+  // reset the page memory to reduce memory pressure?
+  // note: must come after setting `segment_in_use` to false
+  mi_page_reset(segment, page, used_size, tld);
 }
 
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
@@ -568,7 +620,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // remove the segment from the free page queue if needed
   mi_segment_remove_from_free_queue(segment,tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
-
+  
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
@@ -628,6 +680,8 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     for (size_t i = 0; i < segment->capacity; i++) {
       mi_page_t* page = &segment->pages[i];
       if (page->segment_in_use) {
+        mi_assert_internal(!page->is_reset);
+        mi_assert_internal(page->is_committed);
         segment->abandoned--;
         mi_assert(page->next == NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
@@ -636,7 +690,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
           mi_segment_page_clear(segment,page,tld);
         }
         else {
-          // otherwise reclaim it
+          // otherwise reclaim it          
           _mi_page_reclaim(heap,page);
         }
       }
@@ -666,8 +720,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
   mi_page_t* page = mi_segment_find_free(segment, tld);
-  page->segment_in_use = true;  
-  segment->used++;
+  mi_assert_internal(page->segment_in_use);  
   mi_assert_internal(segment->used <= segment->capacity);
   if (segment->used == segment->capacity) {
     // if no more free pages, remove from the queue
@@ -685,7 +738,11 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift,
     mi_segment_enqueue(free_queue, segment);
   }
   mi_assert_internal(free_queue->first != NULL);
-  return mi_segment_page_alloc_in(free_queue->first,tld);
+  mi_page_t* page = mi_segment_page_alloc_in(free_queue->first,tld);
+#if MI_DEBUG>=2
+  _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0;
+#endif
+  return page;
 }
 
 static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
@@ -706,6 +763,9 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_
   segment->used = 1;
   mi_page_t* page = &segment->pages[0];
   page->segment_in_use = true;
+#if MI_DEBUG>=2
+  _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0;
+#endif
   return page;
 }
 
@@ -717,7 +777,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   segment->used = 1;
   segment->thread_id = 0; // huge pages are immediately abandoned
   mi_page_t* page = &segment->pages[0];
-  page->segment_in_use = true;
+  page->segment_in_use = true;  
   return page;
 }
 

From 049dbf41bacbf8a839551cd3e7710ffa1925b770 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Wed, 20 Nov 2019 15:44:07 -0800
Subject: [PATCH 048/104] fix commit bits for huge page allocations

---
 src/memory.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index 94b6348f..214bf0d3 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -181,6 +181,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
   if (start == NULL) return false;
   mi_assert_internal(!(region_large && !allow_large));
+  mi_assert_internal(!region_large || region_commit);
 
   // claim a fresh slot
   const uintptr_t idx = mi_atomic_increment(&regions_count);
@@ -194,8 +195,8 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   mem_region_t* r = &regions[idx];
   r->arena_memid  = arena_memid;
   mi_atomic_write(&r->in_use, 0);
-  mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL));
-  mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0));
+  mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
+  mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
   mi_atomic_write(&r->reset, 0);
   *bit_idx = 0;
   mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
@@ -291,6 +292,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
     bool any_uncommitted;
     mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
     if (any_uncommitted) {
+      mi_assert_internal(!info.is_large);
       bool commit_zero;
       _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld);
       if (commit_zero) *is_zero = true;
@@ -304,6 +306,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
 
   // unreset reset blocks
   if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
+    mi_assert_internal(!info.is_large);
     mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); 
     mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
     bool reset_zero;

From 24b768363efa415f74ba25d53c6fdae55c1aa24c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 22 Nov 2019 09:28:48 -0800
Subject: [PATCH 049/104] bump version to 1.3 for further development

---
 cmake/mimalloc-config-version.cmake          | 2 +-
 ide/vs2019/mimalloc-override.vcxproj.filters | 6 ++++++
 ide/vs2019/mimalloc.vcxproj.filters          | 6 ++++++
 include/mimalloc.h                           | 2 +-
 test/CMakeLists.txt                          | 2 +-
 5 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index 9d78b5a0..f64948d3 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,5 +1,5 @@
 set(mi_version_major 1)
-set(mi_version_minor 2)
+set(mi_version_minor 3)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters
index bc1e4c60..b2dea4e1 100644
--- a/ide/vs2019/mimalloc-override.vcxproj.filters
+++ b/ide/vs2019/mimalloc-override.vcxproj.filters
@@ -40,6 +40,12 @@
     <ClCompile Include="..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\bitmap.inc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters
index b2282df3..0cce0c4f 100644
--- a/ide/vs2019/mimalloc.vcxproj.filters
+++ b/ide/vs2019/mimalloc.vcxproj.filters
@@ -43,6 +43,12 @@
     <ClCompile Include="..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\bitmap.inc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/include/mimalloc.h b/include/mimalloc.h
index f727a990..2944de89 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 120   // major + 2 digits minor
+#define MI_MALLOC_VERSION 130   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index a80dde58..ed204888 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE)
 endif()
 
 # Import mimalloc (if installed)
-find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
+find_package(mimalloc 1.3 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
 message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}")
 
 # overriding with a dynamic library

From 0d3c195f376f32ba7de5124d19294a765aaf68f3 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 22 Nov 2019 11:28:55 -0800
Subject: [PATCH 050/104] update stress test with more documentation

---
 test/test-stress.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index 6b2fb8c4..b549e1b4 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -5,9 +5,14 @@ terms of the MIT license.
 -----------------------------------------------------------------------------*/
 
 /* This is a stress test for the allocator, using multiple threads and
-   transferring objects between threads. This is not a typical workload
-   but uses a random linear size distribution. Timing can also depend on
-   (random) thread scheduling. Do not use this test as a benchmark!
+   transferring objects between threads. It tries to reflect real-world workloads:
+   - allocation size is distributed linearly in powers of two
+   - with some fraction extra large (and some extra extra large)
+   - the allocations are initialized and read again at free
+   - pointers transfer between threads
+   - threads are terminated and recreated with some objects surviving in between
+   - uses deterministic "randomness", but execution can still depend on
+     (random) thread scheduling. Do not use this test as a benchmark!
 */
 
 #include <stdio.h>
@@ -22,13 +27,13 @@ terms of the MIT license.
 // argument defaults
 static int THREADS = 32;      // more repeatable if THREADS <= #processors
 static int SCALE   = 50;      // scaling factor
-static int ITER    = 10;      // N full iterations re-creating all threads
+static int ITER    = 10;      // N full iterations destructing and re-creating all threads
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int SCALE   = 100;  // scaling factor
 
 static bool   allow_large_objects = true;    // allow very large objects?
-static size_t use_one_size = 0;              // use single object size of N uintptr_t?
+static size_t use_one_size = 0;              // use single object size of `N * sizeof(uintptr_t)`?
 
 
 #ifdef USE_STD_MALLOC
@@ -185,7 +190,7 @@ int main(int argc, char** argv) {
     long n = (strtol(argv[3], &end, 10));
     if (n > 0) ITER = n;
   }
-  printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
+  printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
   //int res = mi_reserve_huge_os_pages(4,1);
   //printf("(reserve huge: %i\n)", res);
 
@@ -204,7 +209,7 @@ int main(int argc, char** argv) {
     }
     mi_collect(false);
 #ifndef NDEBUG
-    if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); }
+    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
 #endif
   }
 

From 4a0d35afd0714f3c8d37957d3a8b384d0591995d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 23 Nov 2019 11:59:19 -0800
Subject: [PATCH 051/104] improve secure guard page allocation to work with
 non-eager commit

---
 src/memory.c  |   4 +-
 src/options.c |   2 +-
 src/segment.c | 101 +++++++++++++++++++++++++++++++++-----------------
 3 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index 214bf0d3..b29e18f3 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -302,14 +302,14 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
     // no need to commit, but check if already fully committed
     *commit = mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
   }  
-  mi_assert_internal(mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
+  mi_assert_internal(!*commit || mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
 
   // unreset reset blocks
   if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
     mi_assert_internal(!info.is_large);
     mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); 
     mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
-    bool reset_zero;
+    bool reset_zero = false;
     _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
     if (reset_zero) *is_zero = true;
   }
diff --git a/src/options.c b/src/options.c
index 9b6e4cd0..8975a6d3 100644
--- a/src/options.c
+++ b/src/options.c
@@ -69,7 +69,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 500,UNINIT, MI_OPTION(reset_delay) },        // reset delay in milli-seconds
+  { 500, UNINIT, MI_OPTION(reset_delay) },        // reset delay in milli-seconds
   { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
diff --git a/src/segment.c b/src/segment.c
index ffba8c0d..0b6501d8 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -123,10 +123,18 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t
 }
 #endif
 
-#if (MI_DEBUG>=3)
-static size_t mi_segment_pagesize(mi_segment_t* segment) {
-  return ((size_t)1 << segment->page_shift);
+static size_t mi_segment_page_size(mi_segment_t* segment) {
+  if (segment->capacity > 1) {
+    mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM);
+    return ((size_t)1 << segment->page_shift);
+  }
+  else {
+    mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE);
+    return segment->segment_size;
+  }
 }
+
+#if (MI_DEBUG>=3)
 static bool mi_segment_is_valid(mi_segment_t* segment) {
   mi_assert_internal(segment != NULL);
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
@@ -139,11 +147,47 @@ static bool mi_segment_is_valid(mi_segment_t* segment) {
   mi_assert_internal(nfree + segment->used == segment->capacity);
   mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
   mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
-                     (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size));
+                     (mi_segment_page_size(segment) * segment->capacity == segment->segment_size));
   return true;
 }
 #endif
 
+/* -----------------------------------------------------------
+  Guard pages
+----------------------------------------------------------- */
+
+static void mi_segment_protect_range(void* p, size_t size, bool protect) {
+  if (protect) {
+    _mi_mem_protect(p, size);
+  }
+  else {
+    _mi_mem_unprotect(p, size);
+  }
+}
+
+static void mi_segment_protect(mi_segment_t* segment, bool protect) {
+  // add/remove guard pages
+  if (MI_SECURE != 0) {
+    // in secure mode, we set up a protected page in between the segment info and the page data
+    const size_t os_page_size = _mi_os_page_size();
+    mi_assert_internal((segment->segment_info_size - os_page_size) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t))));
+    mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_page_size == 0);
+    mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect);
+    if (MI_SECURE <= 1 || segment->capacity == 1) {
+      // and protect the last (or only) page too
+      mi_segment_protect_range((uint8_t*)segment + segment->segment_size - os_page_size, os_page_size, protect);
+    }
+    else {
+      // or protect every page 
+      const size_t page_size = mi_segment_page_size(segment);
+      for (size_t i = 0; i < segment->capacity; i++) {
+        if (segment->pages[i].is_committed) {
+          mi_segment_protect_range((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size, protect);
+        }
+      }
+    }
+  }
+}
 
 /* -----------------------------------------------------------
   Page reset
@@ -269,15 +313,18 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
   mi_segments_track_size(-((long)segment_size),tld);
   if (MI_SECURE != 0) {
     mi_assert_internal(!segment->mem_is_fixed);
-    _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
+    mi_segment_protect(segment, false); // ensure no more guard pages are set
   }
   
   bool fully_committed = true;
   bool any_reset = false;
   for (size_t i = 0; i < segment->capacity; i++) {
-    const mi_page_t* page = &segment->pages[i];    
+    mi_page_t* page = &segment->pages[i];    
     if (!page->is_committed) fully_committed = false;
-    if (page->is_reset) any_reset = true;
+    else if (page->is_reset) {
+      any_reset = true;
+      // mi_page_unreset(segment, page, 0, tld);
+    }
   }
   _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os);
 }
@@ -394,8 +441,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
       }
       if (MI_SECURE!=0) {
         mi_assert_internal(!segment->mem_is_fixed);
-        // TODO: should we unprotect per page? (with is_protected flag?)
-        _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs
+        mi_segment_protect(segment, false); // reset protection if the page kind differs
       }
     }    
   }
@@ -408,7 +454,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     if (!commit) {
       // ensure the initial info is committed
       bool commit_zero = false;
-      _mi_mem_commit(segment, info_size, &commit_zero, tld->os);
+      _mi_mem_commit(segment, pre_size, &commit_zero, tld->os);
       if (commit_zero) is_zero = true;
     }
     segment->memid = memid;
@@ -419,25 +465,6 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
   if (!pages_still_good) {    
-    // guard pages
-    if (MI_SECURE != 0) {
-      // in secure mode, we set up a protected page in between the segment info
-      // and the page data
-      mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0);
-      _mi_mem_protect((uint8_t*)segment + info_size, (pre_size - info_size));
-      const size_t os_page_size = _mi_os_page_size();
-      if (MI_SECURE <= 1) {
-        // and protect the last page too
-        _mi_mem_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size);
-      }
-      else {
-        // protect every page
-        for (size_t i = 0; i < capacity; i++) {
-          _mi_mem_protect((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size);
-        }
-      }
-    }
-
     // zero the segment info (but not the `mem` fields)
     ptrdiff_t ofs = offsetof(mi_segment_t, next);
     memset((uint8_t*)segment + ofs, 0, info_size - ofs);
@@ -465,6 +492,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   segment->thread_id  = _mi_thread_id();
   segment->cookie = _mi_ptr_cookie(segment);
   _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
+
+  // set protection
+  mi_segment_protect(segment, true);
   
   //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
   return segment;
@@ -525,11 +555,13 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
         mi_assert_internal(!segment->mem_is_fixed);
         mi_assert_internal(!page->is_reset);
         size_t psize;
-        uint8_t* start = _mi_page_start(segment, page, &psize);
+        uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
         page->is_committed = true;
         bool is_zero = false;
-        _mi_mem_commit(start,psize,&is_zero,tld->os);
-        if (is_zero) page->is_zero_init = true;
+        const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0);
+        _mi_mem_commit(start,psize + gsize,&is_zero,tld->os);
+        if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); }
+        if (is_zero) { page->is_zero_init = true; }
       }
       if (page->is_reset) {
         mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset?
@@ -759,7 +791,7 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld
 
 static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld);
-  if (segment == NULL) return NULL;
+  if (segment == NULL) return NULL;  
   segment->used = 1;
   mi_page_t* page = &segment->pages[0];
   page->segment_in_use = true;
@@ -773,7 +805,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
 {
   mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld);
   if (segment == NULL) return NULL;
-  mi_assert_internal(segment->segment_size - segment->segment_info_size >= size);
+  mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size);
   segment->used = 1;
   segment->thread_id = 0; // huge pages are immediately abandoned
   mi_page_t* page = &segment->pages[0];
@@ -800,5 +832,6 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
     page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
   }
   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page)));
+  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
   return page;
 }

From 41ef691292caa2417ef7e954f8eb9db2b18d1031 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Sun, 1 Sep 2019 01:06:01 -0700
Subject: [PATCH 052/104] avoid deadlock with BSD systems that call malloc from
 the dynamic linker

extend the exception used for macOS to cover also OpenBSD (tested in 6.4+)
and DragonFlyBSD (tested in 5.6.2)
---
 include/mimalloc-internal.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 452f0b68..2ddf3f16 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -10,7 +10,8 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include "mimalloc-types.h"
 
-#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__))
+#if defined(MI_MALLOC_OVERRIDE) && \
+	(defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
 #define MI_TLS_RECURSE_GUARD
 #endif
 
@@ -221,7 +222,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate
 
 static inline mi_heap_t* mi_get_default_heap(void) {
 #ifdef MI_TLS_RECURSE_GUARD
-  // on some platforms, like macOS, the dynamic loader calls `malloc`
+  // on some BSD platforms, like macOS, the dynamic loader calls `malloc`
   // to initialize thread local data. To avoid recursion, we need to avoid
   // accessing the thread local `_mi_default_heap` until our module is loaded
   // and use the statically allocated main heap until that time.

From 727d33b96f9d120d022a9de1bf8b0f39f7645c15 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 24 Nov 2019 14:40:47 -0800
Subject: [PATCH 053/104] more precise memory reset

---
 src/memory.c  | 16 ++++++++++------
 src/segment.c | 50 +++++++++++++++++++++-----------------------------
 2 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index b29e18f3..9505c98f 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -306,15 +306,18 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
 
   // unreset reset blocks
   if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
+    // some blocks are still reset
     mi_assert_internal(!info.is_large);
     mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); 
     mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
-    bool reset_zero = false;
-    _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
-    if (reset_zero) *is_zero = true;
+    if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
+      bool reset_zero = false;
+      _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
+      if (reset_zero) *is_zero = true;
+    }
   }
   mi_assert_internal(!mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
-
+  
   #if (MI_DEBUG>=2)
   if (*commit) { ((uint8_t*)p)[0] = 0; }
   #endif
@@ -409,8 +412,9 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
     }
 
     // reset the blocks to reduce the working set.
-    if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) &&
-        mi_option_is_enabled(mi_option_eager_commit))  // cannot reset halfway committed segments, use only `option_page_reset` instead            
+    if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) 
+       && (mi_option_is_enabled(mi_option_eager_commit) ||
+           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead            
     {
       bool any_unreset;
       mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
diff --git a/src/segment.c b/src/segment.c
index 0b6501d8..887248b4 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -320,10 +320,10 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
   bool any_reset = false;
   for (size_t i = 0; i < segment->capacity; i++) {
     mi_page_t* page = &segment->pages[i];    
-    if (!page->is_committed) fully_committed = false;
-    else if (page->is_reset) {
+    if (!page->is_committed) { fully_committed = false; }
+    if (page->is_reset) {
       any_reset = true;
-      // mi_page_unreset(segment, page, 0, tld);
+      if (mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false;}
     }
   }
   _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os);
@@ -419,7 +419,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   // Initialize parameters
   bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
-  bool commit = eager || (page_kind >= MI_PAGE_LARGE);
+  bool commit = eager; // || (page_kind >= MI_PAGE_LARGE);
   bool pages_still_good = false;
   bool is_zero = false;
   
@@ -431,18 +431,23 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     }
     else 
     {
+      if (MI_SECURE!=0) {
+        mi_assert_internal(!segment->mem_is_fixed);
+        mi_segment_protect(segment, false); // reset protection if the page kind differs
+      }
       // different page kinds; unreset any reset pages, and unprotect
       // TODO: optimize cache pop to return fitting pages if possible?
       for (size_t i = 0; i < segment->capacity; i++) {
         mi_page_t* page = &segment->pages[i];
         if (page->is_reset) { 
-          mi_page_unreset(segment, page, 0, tld);  // todo: only unreset the part that was reset? (instead of the full page)
+          if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) {
+            page->is_reset = false;
+          }
+          else {
+            mi_page_unreset(segment, page, 0, tld);  // todo: only unreset the part that was reset? (instead of the full page)
+          }
         }
       }
-      if (MI_SECURE!=0) {
-        mi_assert_internal(!segment->mem_is_fixed);
-        mi_segment_protect(segment, false); // reset protection if the page kind differs
-      }
     }    
   }
   else {
@@ -491,7 +496,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   segment->segment_info_size = pre_size;
   segment->thread_id  = _mi_thread_id();
   segment->cookie = _mi_ptr_cookie(segment);
-  _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
+  // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
 
   // set protection
   mi_segment_protect(segment, true);
@@ -512,18 +517,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
   mi_assert(segment->next == NULL);
   mi_assert(segment->prev == NULL);
   _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);  
-
-  // update reset memory statistics
-  /*
-  for (uint8_t i = 0; i < segment->capacity; i++) {
-    mi_page_t* page = &segment->pages[i];
-    if (page->is_reset) {
-      page->is_reset = false;
-      mi_stat_decrease( tld->stats->reset,mi_page_size(page));
-    }
-  }
-  */
-
+  
   if (!force && mi_segment_cache_push(segment, tld)) {
     // it is put in our cache
   }
@@ -602,7 +596,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
 
   // reset the page memory to reduce memory pressure?
   // note: must come after setting `segment_in_use` to false
-  mi_page_reset(segment, page, used_size, tld);
+  mi_page_reset(segment, page, 0 /*used_size*/, tld);
 }
 
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
@@ -792,9 +786,8 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld
 static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld);
   if (segment == NULL) return NULL;  
-  segment->used = 1;
-  mi_page_t* page = &segment->pages[0];
-  page->segment_in_use = true;
+  mi_page_t* page = mi_segment_find_free(segment, tld);
+  mi_assert_internal(page != NULL);
 #if MI_DEBUG>=2
   _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0;
 #endif
@@ -806,10 +799,9 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld);
   if (segment == NULL) return NULL;
   mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size);
-  segment->used = 1;
   segment->thread_id = 0; // huge pages are immediately abandoned
-  mi_page_t* page = &segment->pages[0];
-  page->segment_in_use = true;  
+  mi_page_t* page = mi_segment_find_free(segment, tld);
+  mi_assert_internal(page != NULL);
   return page;
 }
 

From 4452431b6c66250776200b24465a01e03a393d0a Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 24 Nov 2019 15:25:19 -0800
Subject: [PATCH 054/104] reenable segment cache and fix initial segment commit

---
 src/segment.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 887248b4..9aba8525 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -348,7 +348,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
 
 static bool mi_segment_cache_full(mi_segments_tld_t* tld) 
 {
-  if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
+  // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
   size_t max_cache = mi_option_get(mi_option_segment_cache);
   if (tld->cache_count < max_cache
        && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache
@@ -424,7 +424,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   bool is_zero = false;
   
   // Try to get it from our thread local cache first
-  mi_segment_t* segment = NULL; // mi_segment_cache_pop(segment_size, tld);
+  mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
   if (segment != NULL) {
     if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) {
       pages_still_good = true;
@@ -448,6 +448,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
           }
         }
       }
+      // ensure the initial info is committed
+      if (segment->capacity < capacity) {
+        bool commit_zero = false;
+        _mi_mem_commit(segment, pre_size, &commit_zero, tld->os);
+        if (commit_zero) is_zero = true;
+      }
     }    
   }
   else {

From c6df7a199c384ed0394e0e57475e6e866172b544 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 24 Nov 2019 22:00:11 -0800
Subject: [PATCH 055/104] experimental eager page commit option

---
 include/mimalloc.h |  1 +
 src/options.c      |  5 +++--
 src/os.c           |  2 +-
 src/page.c         | 31 ++++++++++++++-----------
 src/segment.c      | 56 ++++++++++++++++++++++++++++------------------
 5 files changed, 57 insertions(+), 38 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 2944de89..7da7cf62 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -267,6 +267,7 @@ typedef enum mi_option_e {
   // the following options are experimental
   mi_option_eager_commit,
   mi_option_eager_region_commit,
+  mi_option_eager_page_commit,
   mi_option_large_os_pages,         // implies eager commit
   mi_option_reserve_huge_os_pages,
   mi_option_segment_cache,
diff --git a/src/options.c b/src/options.c
index 8975a6d3..bb6718be 100644
--- a/src/options.c
+++ b/src/options.c
@@ -56,18 +56,19 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
+  { 0, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
   #ifdef _WIN32   // and BSD?
   { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
   #else
   { 1, UNINIT, MI_OPTION(eager_region_commit) },
   #endif
+  { 1, UNINIT, MI_OPTION(eager_page_commit) },   
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
-  { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
+  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 500, UNINIT, MI_OPTION(reset_delay) },        // reset delay in milli-seconds
   { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
diff --git a/src/os.c b/src/os.c
index 553d72c9..0197bafc 100644
--- a/src/os.c
+++ b/src/os.c
@@ -603,7 +603,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
 // (but not for the reset version where we want commit to be conservative as well)
 static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) {
   // page align in the range, commit liberally, decommit conservative
-  *is_zero = false;
+  if (is_zero != NULL) { *is_zero = false; }
   size_t csize;
   void* start = mi_os_page_align_areax(conservative, addr, size, &csize);
   if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr))
diff --git a/src/page.c b/src/page.c
index 31c8fd5f..2992bf09 100644
--- a/src/page.c
+++ b/src/page.c
@@ -35,7 +35,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
   return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size));
 }
 
-static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats);
+static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
 
 
 #if (MI_DEBUG>=3)
@@ -242,7 +242,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
   mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
   if (page == NULL) return NULL;
   mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
-  mi_page_init(heap, page, block_size, &heap->tld->stats);
+  mi_page_init(heap, page, block_size, heap->tld);
   _mi_stat_increase( &heap->tld->stats.pages, 1);
   if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
   mi_assert_expensive(_mi_page_is_valid(page));
@@ -544,8 +544,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
 // Note: we also experimented with "bump" allocation on the first
 // allocations but this did not speed up any benchmark (due to an
 // extra test in malloc? or cache effects?)
-static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) {
-  UNUSED(stats);
+static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
   mi_assert_expensive(mi_page_is_valid_init(page));
   #if (MI_SECURE<=2)
   mi_assert(page->free == NULL);
@@ -555,8 +554,8 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
   if (page->capacity >= page->reserved) return;
 
   size_t page_size;
-  _mi_page_start(_mi_page_segment(page), page, &page_size);
-  mi_stat_counter_increase(stats->pages_extended, 1);
+  uint8_t* page_start = _mi_page_start(_mi_page_segment(page), page, &page_size);
+  mi_stat_counter_increase(tld->stats.pages_extended, 1);
 
   // calculate the extend count
   size_t extend = page->reserved - page->capacity;
@@ -572,16 +571,22 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
   mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
   mi_assert_internal(extend < (1UL<<16));
 
+  // commit on-demand for large and huge pages?
+  if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
+    uint8_t* start = page_start + (page->capacity * page->block_size);
+    _mi_mem_commit(start, extend * page->block_size, NULL, &tld->os);
+  }
+
   // and append the extend the free list
   if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) {
-    mi_page_free_list_extend(page, extend, stats );
+    mi_page_free_list_extend(page, extend, &tld->stats );
   }
   else {
-    mi_page_free_list_extend_secure(heap, page, extend, stats);
+    mi_page_free_list_extend_secure(heap, page, extend, &tld->stats);
   }
   // enable the new free list
   page->capacity += (uint16_t)extend;
-  mi_stat_increase(stats->page_committed, extend * page->block_size);
+  mi_stat_increase(tld->stats.page_committed, extend * page->block_size);
 
   // extension into zero initialized memory preserves the zero'd free list
   if (!page->is_zero_init) {
@@ -591,7 +596,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
 }
 
 // Initialize a fresh page
-static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_stats_t* stats) {
+static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert(segment != NULL);
@@ -621,7 +626,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_expensive(mi_page_is_valid_init(page));
 
   // initialize an initial free list
-  mi_page_extend_free(heap,page,stats);
+  mi_page_extend_free(heap,page,tld);
   mi_assert(mi_page_immediate_available(page));
 }
 
@@ -666,7 +671,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
     // 2. Try to extend
     if (page->capacity < page->reserved) {
-      mi_page_extend_free(heap, page, &heap->tld->stats);
+      mi_page_extend_free(heap, page, heap->tld);
       mi_assert_internal(mi_page_immediate_available(page));
       break;
     }
@@ -707,7 +712,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
   if (page != NULL) {
     if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) {
       // in secure mode, we extend half the time to increase randomness
-      mi_page_extend_free(heap, page, &heap->tld->stats);
+      mi_page_extend_free(heap, page, heap->tld);
       mi_assert_internal(mi_page_immediate_available(page));
     }
     else {
diff --git a/src/segment.c b/src/segment.c
index 9aba8525..13bcf56a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -200,7 +200,12 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m
   void* start = mi_segment_raw_page_start(segment, page, &psize);
   page->is_reset = true;
   mi_assert_internal(size <= psize);
-  _mi_mem_reset(start, ((size == 0 || size > psize) ? psize : size), tld->os);
+  size_t reset_size = (size == 0 || size > psize ? psize : size);
+  if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
+    mi_assert_internal(page->block_size > 0);
+    reset_size = page->capacity * page->block_size;
+  }
+  _mi_mem_reset(start, reset_size, tld->os);
 }
 
 static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld)
@@ -210,8 +215,13 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size,
   page->is_reset = false;
   size_t psize;
   uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
+  size_t unreset_size = (size == 0 || size > psize ? psize : size);
+  if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
+    mi_assert_internal(page->block_size > 0);
+    unreset_size = page->capacity * page->block_size;
+  }
   bool is_zero = false;
-  _mi_mem_unreset(start, ((size == 0 || size > psize) ? psize : size), &is_zero, tld->os);
+  _mi_mem_unreset(start, unreset_size, &is_zero, tld->os);
   if (is_zero) page->is_zero_init = true;
 }
 
@@ -414,8 +424,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t pre_size;
   size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size);
   mi_assert_internal(segment_size >= required);
-  size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
-
+  
   // Initialize parameters
   bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
@@ -554,14 +563,16 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
       if (!page->is_committed) {
         mi_assert_internal(!segment->mem_is_fixed);
         mi_assert_internal(!page->is_reset);
-        size_t psize;
-        uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
-        page->is_committed = true;
-        bool is_zero = false;
-        const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0);
-        _mi_mem_commit(start,psize + gsize,&is_zero,tld->os);
-        if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); }
-        if (is_zero) { page->is_zero_init = true; }
+        if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) {
+          page->is_committed = true;
+          size_t psize;
+          uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
+          bool is_zero = false;
+          const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0);
+          _mi_mem_commit(start, psize + gsize, &is_zero, tld->os);
+          if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); }
+          if (is_zero) { page->is_zero_init = true; }
+        }
       }
       if (page->is_reset) {
         mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset?
@@ -583,26 +594,27 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
 static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert_internal(page->segment_in_use);
   mi_assert_internal(mi_page_all_free(page));
-  mi_assert_internal(page->is_committed);
+  mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed);
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
   
   // calculate the used size from the raw (non-aligned) start of the page
-  size_t pre_size;
-  _mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size);
-  size_t used_size = pre_size + (page->capacity * page->block_size);
+  //size_t pre_size;
+  //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size);
+  //size_t used_size = pre_size + (page->capacity * page->block_size);
 
-  // zero the page data, but not the segment fields  
   page->is_zero_init = false;
-  ptrdiff_t ofs = offsetof(mi_page_t,capacity);
-  memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
   page->segment_in_use = false;
-  segment->used--;
 
   // reset the page memory to reduce memory pressure?
-  // note: must come after setting `segment_in_use` to false
+  // note: must come after setting `segment_in_use` to false but before block_size becomes 0
   mi_page_reset(segment, page, 0 /*used_size*/, tld);
+
+  // zero the page data, but not the segment fields  
+  ptrdiff_t ofs = offsetof(mi_page_t,capacity);
+  memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
+  segment->used--;
 }
 
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
@@ -713,7 +725,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
       mi_page_t* page = &segment->pages[i];
       if (page->segment_in_use) {
         mi_assert_internal(!page->is_reset);
-        mi_assert_internal(page->is_committed);
+        mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed);
         segment->abandoned--;
         mi_assert(page->next == NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);

From 1643273357ac13fbe698306776d35a9d25afcb53 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 25 Nov 2019 10:11:29 -0800
Subject: [PATCH 056/104] fix unix bug in decommit size

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index 0197bafc..6cf89c99 100644
--- a/src/os.c
+++ b/src/os.c
@@ -632,7 +632,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   #elif defined(MAP_FIXED)
   if (!commit) {
     // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
-    void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0);
+    void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0);
     if (p != start) { err = errno; }
   }
   else {

From 1d998af85432bc744275df7c9723821d947e796a Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 25 Nov 2019 10:47:17 -0800
Subject: [PATCH 057/104] clean up options; make secure work with
 eager_page_commit

---
 include/mimalloc.h |  6 +++---
 src/options.c      | 14 +++++++-------
 src/segment.c      | 36 +++++++++++++++++++++++-------------
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 7da7cf62..94d9edfc 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -267,19 +267,19 @@ typedef enum mi_option_e {
   // the following options are experimental
   mi_option_eager_commit,
   mi_option_eager_region_commit,
-  mi_option_eager_page_commit,
+  mi_option_reset_decommits,
   mi_option_large_os_pages,         // implies eager commit
   mi_option_reserve_huge_os_pages,
   mi_option_segment_cache,
   mi_option_page_reset,
   mi_option_segment_reset,
-  mi_option_reset_decommits,
   mi_option_eager_commit_delay,
   mi_option_reset_delay,
   mi_option_use_numa_nodes,
   mi_option_os_tag,
   mi_option_max_errors,
-  _mi_option_last
+  _mi_option_last,
+  mi_option_eager_page_commit = mi_option_eager_commit
 } mi_option_t;
 
 
diff --git a/src/options.c b/src/options.c
index bb6718be..c8df29a8 100644
--- a/src/options.c
+++ b/src/options.c
@@ -56,21 +56,21 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 0, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
-  #ifdef _WIN32   // and BSD?
-  { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
+  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit on demand
+  #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
+  { 0, UNINIT, MI_OPTION(eager_region_commit) },
+  { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) },
+  { 1, UNINIT, MI_OPTION(eager_region_commit) }, 
+  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset uses MADV_FREE/MADV_DONTNEED
   #endif
-  { 1, UNINIT, MI_OPTION(eager_page_commit) },   
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 500, UNINIT, MI_OPTION(reset_delay) },        // reset delay in milli-seconds
+  { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
diff --git a/src/segment.c b/src/segment.c
index 13bcf56a..f6ce939b 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -165,7 +165,7 @@ static void mi_segment_protect_range(void* p, size_t size, bool protect) {
   }
 }
 
-static void mi_segment_protect(mi_segment_t* segment, bool protect) {
+static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* tld) {
   // add/remove guard pages
   if (MI_SECURE != 0) {
     // in secure mode, we set up a protected page in between the segment info and the page data
@@ -175,7 +175,13 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect) {
     mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect);
     if (MI_SECURE <= 1 || segment->capacity == 1) {
       // and protect the last (or only) page too
-      mi_segment_protect_range((uint8_t*)segment + segment->segment_size - os_page_size, os_page_size, protect);
+      mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE);
+      uint8_t* start = (uint8_t*)segment + segment->segment_size - os_page_size;
+      if (protect && !mi_option_is_enabled(mi_option_eager_page_commit)) {
+        // ensure secure page is committed
+        _mi_mem_commit(start, os_page_size, NULL, tld);
+      }
+      mi_segment_protect_range(start, os_page_size, protect);
     }
     else {
       // or protect every page 
@@ -323,19 +329,23 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
   mi_segments_track_size(-((long)segment_size),tld);
   if (MI_SECURE != 0) {
     mi_assert_internal(!segment->mem_is_fixed);
-    mi_segment_protect(segment, false); // ensure no more guard pages are set
+    mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set
   }
   
-  bool fully_committed = true;
   bool any_reset = false;
+  bool fully_committed = true;
   for (size_t i = 0; i < segment->capacity; i++) {
     mi_page_t* page = &segment->pages[i];    
     if (!page->is_committed) { fully_committed = false; }
-    if (page->is_reset) {
-      any_reset = true;
-      if (mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false;}
-    }
+    if (page->is_reset)      { any_reset = true; }
   }
+  if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { 
+    fully_committed = false; 
+  }
+  if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
+    fully_committed = false;
+  }
+
   _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os);
 }
 
@@ -442,7 +452,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     {
       if (MI_SECURE!=0) {
         mi_assert_internal(!segment->mem_is_fixed);
-        mi_segment_protect(segment, false); // reset protection if the page kind differs
+        mi_segment_protect(segment, false, tld->os); // reset protection if the page kind differs
       }
       // different page kinds; unreset any reset pages, and unprotect
       // TODO: optimize cache pop to return fitting pages if possible?
@@ -514,7 +524,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
 
   // set protection
-  mi_segment_protect(segment, true);
+  mi_segment_protect(segment, true, tld->os);
   
   //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
   return segment;
@@ -563,8 +573,8 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
       if (!page->is_committed) {
         mi_assert_internal(!segment->mem_is_fixed);
         mi_assert_internal(!page->is_reset);
+        page->is_committed = true;
         if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) {
-          page->is_committed = true;
           size_t psize;
           uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
           bool is_zero = false;
@@ -594,7 +604,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
 static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert_internal(page->segment_in_use);
   mi_assert_internal(mi_page_all_free(page));
-  mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed);
+  mi_assert_internal(page->is_committed);
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
@@ -725,7 +735,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
       mi_page_t* page = &segment->pages[i];
       if (page->segment_in_use) {
         mi_assert_internal(!page->is_reset);
-        mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed);
+        mi_assert_internal(page->is_committed);
         segment->abandoned--;
         mi_assert(page->next == NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);

From a799a191360a060afc14ca686f5803bb26448e3b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 25 Nov 2019 14:30:12 -0800
Subject: [PATCH 058/104] fix non-standard line continuation

---
 include/mimalloc-internal.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index bf59656c..99e4b5ba 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -10,8 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include "mimalloc-types.h"
 
-#if defined(MI_MALLOC_OVERRIDE) && \
-	(defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
+#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
 #define MI_TLS_RECURSE_GUARD
 #endif
 

From a407f35c64321f02dbaf956893ced313ca7e199c Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Sun, 1 Dec 2019 00:01:14 -0800
Subject: [PATCH 059/104] add arena.c into the static override object

---
 src/arena.c  | 52 ++++++++++++++++++++++++++--------------------------
 src/static.c |  1 +
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 4a596b2c..90ea2b40 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -7,13 +7,13 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* ----------------------------------------------------------------------------
 "Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). 
-In contrast to the rest of mimalloc, the arenas are shared between 
+large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
+In contrast to the rest of mimalloc, the arenas are shared between
 threads and need to be accessed using atomic operations.
 
 Currently arenas are only used to for huge OS page (1GiB) reservations,
 otherwise it delegates to direct allocation from the OS.
-In the future, we can expose an API to manually add more kinds of arenas 
+In the future, we can expose an API to manually add more kinds of arenas
 which is sometimes needed for embedded devices or shared memory for example.
 (We can also employ this with WASI or `sbrk` systems to reserve large arenas
  on demand and be able to reuse them efficiently).
@@ -41,7 +41,7 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
 void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 
-bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); 
+bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 
 /* -----------------------------------------------------------
   Arena allocation
@@ -82,13 +82,13 @@ static _Atomic(uintptr_t)   mi_arena_count; // = 0
 // Use `0` as a special id for direct OS allocated memory.
 #define MI_MEMID_OS   0
 
-static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
+static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
   mi_assert_internal(arena_index < 0xFE);
   mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
   return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
 }
 
-static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
+static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
   mi_assert_internal(memid != MI_MEMID_OS);
   *arena_index = (memid & 0xFF) - 1;
   *bitmap_index = (memid >> 8);
@@ -101,7 +101,7 @@ static size_t mi_block_count_of_size(size_t size) {
 /* -----------------------------------------------------------
   Thread safe allocation in an arena
 ----------------------------------------------------------- */
-static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) 
+static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
 {
   const size_t fcount = arena->field_count;
   size_t idx = mi_atomic_read(&arena->search_idx);  // start from last search
@@ -120,15 +120,15 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
   Arena Allocation
 ----------------------------------------------------------- */
 
-static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, 
-                                 bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) 
+static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
+                                 bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
   mi_bitmap_index_t bitmap_index;
   if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
 
   // claimed it! set the dirty bits (todo: no need for an atomic op here?)
   void* p  = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
-  *memid   = mi_memid_create(arena_index, bitmap_index);
+  *memid   = mi_arena_id_create(arena_index, bitmap_index);
   *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
   *large   = arena->is_large;
   if (arena->is_committed) {
@@ -152,19 +152,19 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
   return p;
 }
 
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, 
-                              bool* commit, bool* large, bool* is_zero, 
-                              size_t* memid, mi_os_tld_t* tld) 
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
+                              bool* commit, bool* large, bool* is_zero,
+                              size_t* memid, mi_os_tld_t* tld)
 {
   mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
   *memid   = MI_MEMID_OS;
   *is_zero = false;
-  
+
   // try to allocate in an arena if the alignment is small enough
   // and the object is not too large or too small.
-  if (alignment <= MI_SEGMENT_ALIGN && 
-      size <= MI_ARENA_MAX_OBJ_SIZE && 
+  if (alignment <= MI_SEGMENT_ALIGN &&
+      size <= MI_ARENA_MAX_OBJ_SIZE &&
       size >= MI_ARENA_MIN_OBJ_SIZE)
   {
     const size_t bcount = mi_block_count_of_size(size);
@@ -177,7 +177,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
       if (arena==NULL) break; // end reached
       if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      { 
+      {
         void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
         mi_assert_internal((uintptr_t)p % alignment == 0);
         if (p != NULL) return p;
@@ -224,7 +224,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
     // allocated in an arena
     size_t arena_idx;
     size_t bitmap_idx;
-    mi_memid_indices(memid, &arena_idx, &bitmap_idx);
+    mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
     mi_assert_internal(arena_idx < MI_MAX_ARENAS);
     mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
     mi_assert_internal(arena != NULL);
@@ -254,7 +254,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
   mi_assert_internal(arena != NULL);
   mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
   mi_assert_internal(arena->block_count > 0);
-  
+
   uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
   if (i >= MI_MAX_ARENAS) {
     mi_atomic_subu(&mi_arena_count, 1);
@@ -283,10 +283,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
     return ENOMEM;
   }
   _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
-  
+
   size_t bcount = mi_block_count_of_size(hsize);
   size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
-  size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));  
+  size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
   mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) {
     _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
@@ -294,7 +294,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   }
   arena->block_count = bcount;
   arena->field_count = fields;
-  arena->start = (uint8_t*)p;  
+  arena->start = (uint8_t*)p;
   arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
   arena->is_large = true;
   arena->is_zero_init = true;
@@ -308,9 +308,9 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   if (post > 0) {
     // don't use leftover bits at the end
     mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
-    mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); 
+    mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
   }
-  
+
   mi_arena_add(arena);
   return 0;
 }
@@ -326,7 +326,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
   const size_t pages_per = pages / numa_count;
   const size_t pages_mod = pages % numa_count;
   const size_t timeout_per = (timeout_msecs / numa_count) + 50;
-  
+
   // reserve evenly among numa nodes
   for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
     size_t node_pages = pages_per;  // can be 0
@@ -348,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
   UNUSED(max_secs);
   _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
   if (pages_reserved != NULL) *pages_reserved = 0;
-  int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));  
+  int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
   if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
   return err;
 }
diff --git a/src/static.c b/src/static.c
index f1656fa9..d31fca8f 100644
--- a/src/static.c
+++ b/src/static.c
@@ -15,6 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // functions (on Unix's).
 #include "stats.c"
 #include "os.c"
+#include "arena.c"
 #include "memory.c"
 #include "segment.c"
 #include "page.c"

From 36d168a2d9880648c697761dbc6ec90211fd7b8b Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Sun, 1 Dec 2019 00:03:35 -0800
Subject: [PATCH 060/104] add preload check to options initialization

---
 src/options.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/options.c b/src/options.c
index c8df29a8..0d3bd393 100644
--- a/src/options.c
+++ b/src/options.c
@@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept {
 
 // --------------------------------------------------------
 // Options
-// These can be accessed by multiple threads and may be 
+// These can be accessed by multiple threads and may be
 // concurrently initialized, but an initializing data race
 // is ok since they resolve to the same value.
 // --------------------------------------------------------
@@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(eager_region_commit) },
   { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) }, 
+  { 1, UNINIT, MI_OPTION(eager_region_commit) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset uses MADV_FREE/MADV_DONTNEED
   #endif
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
@@ -71,7 +71,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
-  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
+  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes.
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
 };
@@ -89,7 +89,7 @@ void _mi_options_init(void) {
       mi_option_desc_t* desc = &options[option];
       _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
     }
-  }  
+  }
   mi_max_error_count = mi_option_get(mi_option_max_errors);
 }
 
@@ -98,7 +98,7 @@ long mi_option_get(mi_option_t option) {
   mi_option_desc_t* desc = &options[option];
   mi_assert(desc->option == option);  // index should match the option
   if (mi_unlikely(desc->init == UNINIT)) {
-    mi_option_init(desc);    
+    mi_option_init(desc);
   }
   return desc->value;
 }
@@ -142,7 +142,7 @@ void mi_option_disable(mi_option_t option) {
 
 static void mi_out_stderr(const char* msg) {
   #ifdef _WIN32
-  // on windows with redirection, the C runtime cannot handle locale dependent output 
+  // on windows with redirection, the C runtime cannot handle locale dependent output
   // after the main thread closes so we use direct console output.
   if (!_mi_preloading()) { _cputs(msg); }
   #else
@@ -184,7 +184,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
   out_buf[count] = 0;
   out(out_buf);
   if (!no_more_buf) {
-    out_buf[count] = '\n'; // if continue with the buffer, insert a newline    
+    out_buf[count] = '\n'; // if continue with the buffer, insert a newline
   }
 }
 
@@ -340,7 +340,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
 #include <windows.h>
 static bool mi_getenv(const char* name, char* result, size_t result_size) {
   result[0] = 0;
-  size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);  
+  size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
   return (len > 0 && len < result_size);
 }
 #else
@@ -366,7 +366,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
   }
 }
 #endif
-static void mi_option_init(mi_option_desc_t* desc) {  
+static void mi_option_init(mi_option_desc_t* desc) {
+  #ifndef _WIN32
+  // cannot call getenv() when still initializing the C runtime.
+  if (_mi_preloading()) return;
+  #endif
   // Read option value from the environment
   char buf[64+1];
   mi_strlcpy(buf, "mimalloc_", sizeof(buf));

From f9b942d80d0d51a18bcb12959b3f8f72803a981d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 22 Dec 2019 17:08:46 -0800
Subject: [PATCH 061/104] fix compilation of region descriptor on 32-bit

---
 src/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/memory.c b/src/memory.c
index 9505c98f..3d6a22f5 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -79,7 +79,7 @@ typedef union mi_region_info_u {
   struct {
     bool  valid;
     bool  is_large;
-    int   numa_node;
+    short numa_node;
   };
 } mi_region_info_t;
 

From ba87a39d9fcfab97fce28c16c7e1c799ee6af524 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 22 Dec 2019 17:07:01 -0800
Subject: [PATCH 062/104] updated random cookie generation using OS primitives
 and chacha20

---
 CMakeLists.txt                               |   3 +-
 ide/vs2017/mimalloc-override.vcxproj         |   5 +-
 ide/vs2017/mimalloc-override.vcxproj.filters |   3 +
 ide/vs2017/mimalloc.vcxproj                  |   1 +
 ide/vs2017/mimalloc.vcxproj.filters          |   3 +
 ide/vs2019/mimalloc-override.vcxproj         |   1 +
 ide/vs2019/mimalloc-override.vcxproj.filters |   3 +
 ide/vs2019/mimalloc.vcxproj                  |   1 +
 ide/vs2019/mimalloc.vcxproj.filters          |   3 +
 include/mimalloc-internal.h                  |  35 ++-
 include/mimalloc-types.h                     |  11 +-
 src/heap.c                                   |  14 +-
 src/init.c                                   |  77 +----
 src/memory.c                                 |   2 +-
 src/os.c                                     |   8 +-
 src/page.c                                   |  14 +-
 src/random.c                                 | 290 +++++++++++++++++++
 src/static.c                                 |   1 +
 18 files changed, 378 insertions(+), 97 deletions(-)
 create mode 100644 src/random.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4480b89..a894de9b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ include("cmake/mimalloc-config-version.cmake")
 
 set(mi_sources
     src/stats.c
+    src/random.c
     src/os.c
     src/arena.c
     src/memory.c
@@ -115,7 +116,7 @@ endif()
 
 # extra needed libraries
 if(WIN32)
-  list(APPEND mi_libraries psapi shell32 user32)
+  list(APPEND mi_libraries psapi shell32 user32 bcrypt)
 else()
   list(APPEND mi_libraries pthread)
   find_library(LIBRT rt)
diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index 1fc70b33..821645e9 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -129,7 +129,7 @@
       <CompileAs>Default</CompileAs>
     </ClCompile>
     <Link>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <IgnoreSpecificDefaultLibraries>
       </IgnoreSpecificDefaultLibraries>
       <ModuleDefinitionFile>
@@ -195,7 +195,7 @@
     <Link>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <ModuleDefinitionFile>
       </ModuleDefinitionFile>
       <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
@@ -244,6 +244,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters
index 75a8e032..037fbcbb 100644
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@@ -73,5 +73,8 @@
     <ClCompile Include="..\..\src\arena.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj
index 484c4db8..01c6ad27 100644
--- a/ide/vs2017/mimalloc.vcxproj
+++ b/ide/vs2017/mimalloc.vcxproj
@@ -229,6 +229,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />
diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters
index 598b8643..5fe74aa0 100644
--- a/ide/vs2017/mimalloc.vcxproj.filters
+++ b/ide/vs2017/mimalloc.vcxproj.filters
@@ -56,6 +56,9 @@
     <ClCompile Include="..\..\src\arena.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index 49f3d213..6ac6541d 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -247,6 +247,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters
index b2dea4e1..a8c5a5de 100644
--- a/ide/vs2019/mimalloc-override.vcxproj.filters
+++ b/ide/vs2019/mimalloc-override.vcxproj.filters
@@ -46,6 +46,9 @@
     <ClCompile Include="..\..\src\bitmap.inc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index bae49bab..1860f26a 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -232,6 +232,7 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />
diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters
index 0cce0c4f..61de4afe 100644
--- a/ide/vs2019/mimalloc.vcxproj.filters
+++ b/ide/vs2019/mimalloc.vcxproj.filters
@@ -49,6 +49,9 @@
     <ClCompile Include="..\..\src\bitmap.inc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 99e4b5ba..e648c1ff 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -42,12 +42,17 @@ void       _mi_trace_message(const char* fmt, ...);
 void       _mi_options_init(void);
 void       _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn;
 
-// "init.c"
+// random.c
+void       _mi_random_init(mi_random_ctx_t* ctx);
+void       _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
+uintptr_t  _mi_random_next(mi_random_ctx_t* ctx);
+uintptr_t  _mi_heap_random_next(mi_heap_t* heap);
+static inline uintptr_t _mi_random_shuffle(uintptr_t x);
+
+// init.c
 extern mi_stats_t       _mi_stats_main;
 extern const mi_page_t  _mi_page_empty;
 bool       _mi_is_main_thread(void);
-uintptr_t  _mi_random_shuffle(uintptr_t x);
-uintptr_t  _mi_random_init(uintptr_t seed /* can be zero */);
 bool       _mi_preloading();  // true while the C runtime is not ready
 
 // os.c
@@ -100,7 +105,6 @@ uint8_t    _mi_bsr(uintptr_t x);                // bit-scan-right, used on BSD i
 // "heap.c"
 void       _mi_heap_destroy_pages(mi_heap_t* heap);
 void       _mi_heap_collect_abandon(mi_heap_t* heap);
-uintptr_t  _mi_heap_random(mi_heap_t* heap);
 void       _mi_heap_set_default_direct(mi_heap_t* heap);
 
 // "stats.c"
@@ -454,6 +458,29 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
   #endif
 }
 
+// -------------------------------------------------------------------
+// Fast "random" shuffle
+// -------------------------------------------------------------------
+
+static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
+  mi_assert_internal(x!=0);
+#if (MI_INTPTR_SIZE==8)
+  // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
+  x ^= x >> 30;
+  x *= 0xbf58476d1ce4e5b9UL;
+  x ^= x >> 27;
+  x *= 0x94d049bb133111ebUL;
+  x ^= x >> 31;
+#elif (MI_INTPTR_SIZE==4)
+  // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
+  x ^= x >> 16;
+  x *= 0x7feb352dUL;
+  x ^= x >> 15;
+  x *= 0x846ca68bUL;
+  x ^= x >> 16;
+#endif
+  return x;
+}
 
 // -------------------------------------------------------------------
 // Optimize numa node access for the common case (= one node)
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index f79c5a64..1360c125 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 
 #define MI_INTPTR_SIZE  (1<<MI_INTPTR_SHIFT)
+#define MI_INTPTR_BITS  (MI_INTPTR_SIZE*8)
 
 #define KiB     ((size_t)1024)
 #define MiB     (KiB*KiB)
@@ -273,6 +274,14 @@ typedef struct mi_page_queue_s {
 
 #define MI_BIN_FULL  (MI_BIN_HUGE+1)
 
+// Random context
+typedef struct mi_random_cxt_s {
+  uint32_t input[16];
+  uint32_t output[16];
+  int      output_available;
+} mi_random_ctx_t;
+
+
 // A heap owns a set of pages.
 struct mi_heap_s {
   mi_tld_t*             tld;
@@ -281,7 +290,7 @@ struct mi_heap_s {
   volatile _Atomic(mi_block_t*) thread_delayed_free;
   uintptr_t             thread_id;                                   // thread this heap belongs too
   uintptr_t             cookie;
-  uintptr_t             random;                                      // random number used for secure allocation
+  mi_random_ctx_t       random;                                      // random number used for secure allocation
   size_t                page_count;                                  // total number of pages in the `pages` queues.
   bool                  no_reclaim;                                  // `true` if this heap should not reclaim abandoned pages
 };
diff --git a/src/heap.c b/src/heap.c
index 1eb6811c..6d6948df 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -184,12 +184,6 @@ mi_heap_t* mi_heap_get_backing(void) {
   return bheap;
 }
 
-uintptr_t _mi_heap_random(mi_heap_t* heap) {
-  uintptr_t r = heap->random;
-  heap->random = _mi_random_shuffle(r);
-  return r;
-}
-
 mi_heap_t* mi_heap_new(void) {
   mi_heap_t* bheap = mi_heap_get_backing();
   mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);
@@ -197,12 +191,16 @@ mi_heap_t* mi_heap_new(void) {
   memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
   heap->tld = bheap->tld;
   heap->thread_id = _mi_thread_id();
-  heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1;
-  heap->random = _mi_heap_random(bheap);
+  _mi_random_split(&bheap->random, &heap->random);
+  heap->cookie = _mi_heap_random_next(heap) | 1;  
   heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
   return heap;
 }
 
+uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
+  return _mi_random_next(&heap->random);
+}
+
 // zero out the page queues
 static void mi_heap_reset_pages(mi_heap_t* heap) {
   mi_assert_internal(mi_heap_is_initialized(heap));
diff --git a/src/init.c b/src/init.c
index d8fff823..768bc2bf 100644
--- a/src/init.c
+++ b/src/init.c
@@ -85,7 +85,7 @@ const mi_heap_t _mi_heap_empty = {
   ATOMIC_VAR_INIT(NULL),
   0,
   0,
-  0,
+  { {0}, {0}, 0 },
   0,
   false
 };
@@ -116,7 +116,7 @@ mi_heap_t _mi_heap_main = {
 #else
   0xCDCDCDCDUL,
 #endif
-  0,      // random
+  { {0}, {0}, 0 }, // random
   0,      // page count
   false   // can reclaim
 };
@@ -125,66 +125,6 @@ bool _mi_process_is_initialized = false;  // set to `true` in `mi_process_init`.
 
 mi_stats_t _mi_stats_main = { MI_STATS_NULL };
 
-/* -----------------------------------------------------------
-  Initialization of random numbers
------------------------------------------------------------ */
-
-#if defined(_WIN32)
-#include <windows.h>
-#elif defined(__APPLE__)
-#include <mach/mach_time.h>
-#else
-#include <time.h>
-#endif
-
-uintptr_t _mi_random_shuffle(uintptr_t x) {
-  #if (MI_INTPTR_SIZE==8)
-    // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
-  x ^= x >> 30;
-  x *= 0xbf58476d1ce4e5b9UL;
-  x ^= x >> 27;
-  x *= 0x94d049bb133111ebUL;
-  x ^= x >> 31;
-  #elif (MI_INTPTR_SIZE==4)
-    // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
-  x ^= x >> 16;
-  x *= 0x7feb352dUL;
-  x ^= x >> 15;
-  x *= 0x846ca68bUL;
-  x ^= x >> 16;
-  #endif
-  return x;
-}
-
-uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
-#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse
-  uintptr_t x;
-  arc4random_buf(&x, sizeof x);
-#else
-   // Hopefully, ASLR makes our function address random
-  uintptr_t x = (uintptr_t)((void*)&_mi_random_init);
-  x ^= seed;
-  // xor with high res time
-#if defined(_WIN32)
-  LARGE_INTEGER pcount;
-  QueryPerformanceCounter(&pcount);
-  x ^= (uintptr_t)(pcount.QuadPart);
-#elif defined(__APPLE__)
-  x ^= (uintptr_t)mach_absolute_time();
-#else
-  struct timespec time;
-  clock_gettime(CLOCK_MONOTONIC, &time);
-  x ^= (uintptr_t)time.tv_sec;
-  x ^= (uintptr_t)time.tv_nsec;
-#endif
-  // and do a few randomization steps
-  uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
-  for (uintptr_t i = 0; i < max; i++) {
-    x = _mi_random_shuffle(x);
-  }
-#endif
-  return x;
-}
 
 /* -----------------------------------------------------------
   Initialization and freeing of the thread local heaps
@@ -214,8 +154,8 @@ static bool _mi_heap_init(void) {
     mi_heap_t* heap = &td->heap;
     memcpy(heap, &_mi_heap_empty, sizeof(*heap));
     heap->thread_id = _mi_thread_id();
-    heap->random = _mi_random_init(heap->thread_id);
-    heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1;
+    _mi_random_init(&heap->random);    
+    heap->cookie = _mi_heap_random_next(heap) | 1;
     heap->tld = tld;
     memset(tld, 0, sizeof(*tld));
     tld->heap_backing = heap;
@@ -451,16 +391,15 @@ void mi_process_init(void) mi_attr_noexcept {
   // access _mi_heap_default before setting _mi_process_is_initialized to ensure
   // that the TLS slot is allocated without getting into recursion on macOS
   // when using dynamic linking with interpose.
-  mi_heap_t* h = mi_get_default_heap();
+  mi_get_default_heap();
   _mi_process_is_initialized = true;
 
   _mi_heap_main.thread_id = _mi_thread_id();
   _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
-  uintptr_t random = _mi_random_init(_mi_heap_main.thread_id)  ^ (uintptr_t)h;
-  #ifndef __APPLE__
-  _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random;
+  _mi_random_init(&_mi_heap_main.random);  
+  #ifndef __APPLE__  // TODO: fix this? cannot update cookie if allocation already happened..
+  _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
   #endif
-  _mi_heap_main.random = _mi_random_shuffle(random);
   mi_process_setup_auto_thread_done();
   _mi_os_init();
   #if (MI_DEBUG)
diff --git a/src/memory.c b/src/memory.c
index 9505c98f..3d6a22f5 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -79,7 +79,7 @@ typedef union mi_region_info_u {
   struct {
     bool  valid;
     bool  is_large;
-    int   numa_node;
+    short numa_node;
   };
 } mi_region_info_t;
 
diff --git a/src/os.c b/src/os.c
index 6cf89c99..9da209ad 100644
--- a/src/os.c
+++ b/src/os.c
@@ -409,8 +409,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
   if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
     intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area
     #if (MI_SECURE>0 || MI_DEBUG==0)     // security: randomize start of aligned allocations unless in debug mode
-    uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint);
-    init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF));  // (randomly 0-64k)*4MiB == 0 to 256GiB
+    uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
+    init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF));  // (randomly 20 bits)*4MiB == 0 to 4TiB
     #endif
     mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size);
     hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
@@ -909,8 +909,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
       // Initialize the start address after the 32TiB area
       start = ((uintptr_t)32 << 40);  // 32TiB virtual start address
 #if (MI_SECURE>0 || MI_DEBUG==0)      // security: randomize start of huge pages unless in debug mode
-      uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages);
-      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF));  // (randomly 0-1024)*1GiB == 0 to 1TiB
+      uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
+      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF));  // (randomly 12bits)*1GiB == between 0 to 4TiB
 #endif
     }
     end = start + size;
diff --git a/src/page.c b/src/page.c
index 2992bf09..471dca97 100644
--- a/src/page.c
+++ b/src/page.c
@@ -475,11 +475,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
 
   // and initialize the free list by randomly threading through them
   // set up first element
-  size_t current = _mi_heap_random(heap) % slice_count;
+  const uintptr_t r = _mi_heap_random_next(heap);
+  size_t current = r % slice_count;
   counts[current]--;
   mi_block_t* const free_start = blocks[current];
-  // and iterate through the rest
-  uintptr_t rnd = heap->random;
+  // and iterate through the rest; use `random_shuffle` for performance
+  uintptr_t rnd = _mi_random_shuffle(r);
   for (size_t i = 1; i < extend; i++) {
     // call random_shuffle only every INTPTR_SIZE rounds
     const size_t round = i%MI_INTPTR_SIZE;
@@ -499,8 +500,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
   }
   // prepend to the free list (usually NULL)
   mi_block_set_next(page, blocks[current], page->free);  // end of the list
-  page->free = free_start;
-  heap->random = _mi_random_shuffle(rnd);
+  page->free = free_start;  
 }
 
 static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
@@ -608,7 +608,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
   #ifdef MI_ENCODE_FREELIST
-  page->cookie = _mi_heap_random(heap) | 1;
+  page->cookie = _mi_heap_random_next(heap) | 1;
   #endif
   page->is_zero = page->is_zero_init;
 
@@ -710,7 +710,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
   mi_page_queue_t* pq = mi_page_queue(heap,size);
   mi_page_t* page = pq->first;
   if (page != NULL) {
-    if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) {
+    if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
       // in secure mode, we extend half the time to increase randomness
       mi_page_extend_free(heap, page, heap->tld);
       mi_assert_internal(mi_page_immediate_available(page));
diff --git a/src/random.c b/src/random.c
new file mode 100644
index 00000000..063633ff
--- /dev/null
+++ b/src/random.c
@@ -0,0 +1,290 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+
+#include <string.h> // memset
+
+/* ----------------------------------------------------------------------------
+We use our own PRNG to keep predictable performance of random number generation
+and to avoid implementations that use a lock. We only use the OS provided 
+random source to initialize the initial seeds. Since we do not need ultimate
+performance but we do rely on the security (for secret cookies in secure mode)
+we use a cryptographically secure generator (chacha20).
+-----------------------------------------------------------------------------*/
+
+#define MI_CHACHA_ROUNDS (20)   // perhaps use 12 for better performance?
+
+
+/* ----------------------------------------------------------------------------
+Chacha20 implementation as the original algorithm with a 64-bit nonce 
+and counter: https://en.wikipedia.org/wiki/Salsa20
+The input matrix has sixteen 32-bit values:
+Position  0 to  3: constant key
+Position  4 to 11: the key 
+Position 12 to 13: the counter.
+Position 14 to 15: the nonce.
+
+The implementation uses regular C code which compiles very well on modern compilers.
+(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
+-----------------------------------------------------------------------------*/
+
+static inline uint32_t rotl(uint32_t x, uint32_t shift) {
+  return (x << shift) | (x >> (32 - shift));
+}
+
+static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
+  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
+  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
+  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
+  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
+}
+
+static void chacha_block(mi_random_ctx_t* r) 
+{  
+  // scramble into `x`
+  uint32_t x[16];
+  for (size_t i = 0; i < 16; i++) {
+    x[i] = r->input[i];
+  }
+  for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
+    qround(x, 0, 4,  8, 12);
+    qround(x, 1, 5,  9, 13);
+    qround(x, 2, 6, 10, 14);
+    qround(x, 3, 7, 11, 15);
+    qround(x, 0, 5, 10, 15);
+    qround(x, 1, 6, 11, 12);
+    qround(x, 2, 7,  8, 13);
+    qround(x, 3, 4,  9, 14);
+  }
+
+  // add scrambled data to the initial state
+  for (size_t i = 0; i < 16; i++) {
+    r->output[i] = x[i] + r->input[i];
+  }
+  r->output_available = 16;
+
+  // increment the counter for the next round
+  r->input[12] += 1;
+  if (r->input[12] == 0) {
+    r->input[13] += 1;
+    if (r->input[13] == 0) {  // and keep increasing into the nonce 
+      r->input[14] += 1;  
+    }
+  }
+}
+
+static uint32_t chacha_next32(mi_random_ctx_t* r) {
+  if (r->output_available <= 0) {
+    chacha_block(r);
+    r->output_available = 16; // (assign again to suppress static analysis warning)
+  }
+  r->output_available--;
+  const uint32_t x = r->output[r->output_available];  
+  r->output[r->output_available] = 0; // reset once the data is handed out
+  return x;
+}
+
+static inline uint32_t read32(const uint8_t* p, size_t idx32) {
+  const size_t i = 4*idx32;
+  return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
+}
+
+static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) 
+{
+  // since we only use chacha for randomness (and not encryption) we 
+  // do not _need_ to read 32-bit values as little endian but we do anyways
+  // just for being compatible :-)
+  memset(r, 0, sizeof(*r));
+  for (size_t i = 0; i < 4; i++) {
+    const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
+    r->input[i] = read32(sigma,i);
+  }
+  for (size_t i = 0; i < 8; i++) {
+    r->input[i + 4] = read32(key,i);
+  }
+  r->input[12] = 0;
+  r->input[13] = 0;
+  r->input[14] = (uint32_t)nonce;
+  r->input[15] = (uint32_t)(nonce >> 32);  
+}
+
+static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) {
+  memset(init, 0, sizeof(*init));
+  memcpy(init->input, r->input, sizeof(init->input));
+  init->input[12] = 0;
+  init->input[13] = 0;
+  init->input[14] = (uint32_t)nonce;
+  init->input[15] = (uint32_t)(nonce >> 32);
+  mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces!
+  chacha_block(init);
+}
+
+
+/* ----------------------------------------------------------------------------
+Random interface
+-----------------------------------------------------------------------------*/
+
+#if MI_DEBUG>1
+static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
+  return (ctx != NULL && ctx->input[0] != 0);
+}
+#endif
+
+void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) {
+  mi_assert_internal(mi_random_is_initialized(ctx));
+  mi_assert_internal(ctx != new_ctx);
+  chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx);
+}
+
+uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
+  mi_assert_internal(mi_random_is_initialized(ctx));
+  #if MI_INTPTR_SIZE <= 4
+    return chacha_next32(ctx);
+  #elif MI_INTPTR_SIZE == 8
+    return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
+  #else
+  # error "define mi_random_next for this platform"
+  #endif
+}
+
+
+/* ----------------------------------------------------------------------------
+To initialize a fresh random context we rely on the OS:
+- windows: BCryptGenRandom
+- bsd,wasi: arc4random_buf
+- linux: getrandom
+If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
+-----------------------------------------------------------------------------*/
+
+#if defined(_WIN32)
+#pragma comment (lib,"bcrypt.lib")
+#include <bcrypt.h>
+static bool os_random_buf(void* buf, size_t buf_len) {
+  return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+}
+/*
+#define SystemFunction036 NTAPI SystemFunction036
+#include <NTSecAPI.h>
+#undef SystemFunction036
+static bool os_random_buf(void* buf, size_t buf_len) {
+  RtlGenRandom(buf, (ULONG)buf_len);
+  return true;
+}
+*/
+#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \
+      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
+      defined(__wasi__)
+#include <stdlib.h>
+static bool os_random_buf(void* buf, size_t buf_len) {
+  arc4random_buf(buf, buf_len);
+  return true;
+}
+#elif defined(__linux__) 
+#include <sys/random.h>
+static bool os_random_buf(void* buf, size_t buf_len) {
+  return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len);
+}
+#else
+static bool os_random_buf(void* buf, size_t buf_len) {
+  return false;
+}
+#endif
+
+#if defined(_WIN32)
+#include <windows.h>
+#elif defined(__APPLE__)
+#include <mach/mach_time.h>
+#else
+#include <time.h>
+#endif
+
+static uintptr_t os_random_weak(uintptr_t extra_seed) {
+  uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
+  #if defined(_WIN32)
+    LARGE_INTEGER pcount;
+    QueryPerformanceCounter(&pcount);
+    x ^= (uintptr_t)(pcount.QuadPart);
+  #elif defined(__APPLE__)
+    x ^= (uintptr_t)mach_absolute_time();
+  #else
+    struct timespec time;
+    clock_gettime(CLOCK_MONOTONIC, &time);
+    x ^= (uintptr_t)time.tv_sec;
+    x ^= (uintptr_t)time.tv_nsec;
+  #endif
+  // and do a few randomization steps
+  uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
+  for (uintptr_t i = 0; i < max; i++) {
+    x = _mi_random_shuffle(x);
+  }
+  mi_assert_internal(x != 0);
+  return x;
+}
+
+void _mi_random_init(mi_random_ctx_t* ctx) {
+  uint8_t key[32];
+  if (!os_random_buf(key, sizeof(key))) {
+    // if we fail to get random data from the OS, we fall back to a 
+    // weak random source based on the current time
+    uintptr_t x = os_random_weak(0);
+    for (size_t i = 0; i < 8; i++) {  // key is eight 32-bit words.
+      _mi_warning_message("unable to use secure randomness\n");
+      x = _mi_random_shuffle(x);
+      ((uint32_t*)key)[i] = (uint32_t)x;
+    }
+  }
+  chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
+}
+
+/* --------------------------------------------------------
+test vectors from <https://tools.ietf.org/html/rfc8439> 
+----------------------------------------------------------- */
+/*
+static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
+  for (size_t i = 0; i < n; i++) {
+    if (x[i] != y[i]) return false;
+  }
+  return true;
+}
+static void chacha_test(void)
+{
+  uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
+  uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
+  qround(x, 0, 1, 2, 3);
+  mi_assert_internal(array_equals(x, x_out, 4));
+
+  uint32_t y[16] = {
+       0x879531e0,  0xc5ecf37d,  0x516461b1,  0xc9a62f8a,
+       0x44c20ef3,  0x3390af7f,  0xd9fc690b,  0x2a5f714c,
+       0x53372767,  0xb00a5631,  0x974c541a,  0x359e9963,
+       0x5c971061,  0x3d631689,  0x2098d9d6,  0x91dbd320 };
+  uint32_t y_out[16] = {
+       0x879531e0,  0xc5ecf37d,  0xbdb886dc,  0xc9a62f8a,
+       0x44c20ef3,  0x3390af7f,  0xd9fc690b,  0xcfacafd2,
+       0xe46bea80,  0xb00a5631,  0x974c541a,  0x359e9963,
+       0x5c971061,  0xccc07c79,  0x2098d9d6,  0x91dbd320 };
+  qround(y, 2, 7, 8, 13);
+  mi_assert_internal(array_equals(y, y_out, 16));
+
+  mi_random_ctx_t r = {
+    { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
+      0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
+      0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
+      0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
+    {0},
+    0
+  };
+  uint32_t r_out[16] = {
+       0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
+       0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
+       0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
+       0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
+  chacha_block(&r);
+  mi_assert_internal(array_equals(r.output, r_out, 16));
+}
+*/
\ No newline at end of file
diff --git a/src/static.c b/src/static.c
index d31fca8f..0519453e 100644
--- a/src/static.c
+++ b/src/static.c
@@ -14,6 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // it will override all the standard library allocation
 // functions (on Unix's).
 #include "stats.c"
+#include "random.c"
 #include "os.c"
 #include "arena.c"
 #include "memory.c"

From ce02986d56cb69dd2f2d2b1a5c25260338665957 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 27 Dec 2019 22:30:23 -0800
Subject: [PATCH 063/104] variable renaming

---
 src/random.c | 72 ++++++++++++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/random.c b/src/random.c
index 063633ff..43e7dd5c 100644
--- a/src/random.c
+++ b/src/random.c
@@ -44,12 +44,12 @@ static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d
   x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
 }
 
-static void chacha_block(mi_random_ctx_t* r) 
+static void chacha_block(mi_random_ctx_t* ctx) 
 {  
   // scramble into `x`
   uint32_t x[16];
   for (size_t i = 0; i < 16; i++) {
-    x[i] = r->input[i];
+    x[i] = ctx->input[i];
   }
   for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
     qround(x, 0, 4,  8, 12);
@@ -64,28 +64,28 @@ static void chacha_block(mi_random_ctx_t* r)
 
   // add scrambled data to the initial state
   for (size_t i = 0; i < 16; i++) {
-    r->output[i] = x[i] + r->input[i];
+    ctx->output[i] = x[i] + ctx->input[i];
   }
-  r->output_available = 16;
+  ctx->output_available = 16;
 
   // increment the counter for the next round
-  r->input[12] += 1;
-  if (r->input[12] == 0) {
-    r->input[13] += 1;
-    if (r->input[13] == 0) {  // and keep increasing into the nonce 
-      r->input[14] += 1;  
+  ctx->input[12] += 1;
+  if (ctx->input[12] == 0) {
+    ctx->input[13] += 1;
+    if (ctx->input[13] == 0) {  // and keep increasing into the nonce 
+      ctx->input[14] += 1;  
     }
   }
 }
 
-static uint32_t chacha_next32(mi_random_ctx_t* r) {
-  if (r->output_available <= 0) {
-    chacha_block(r);
-    r->output_available = 16; // (assign again to suppress static analysis warning)
+static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
+  if (ctx->output_available <= 0) {
+    chacha_block(ctx);
+    ctx->output_available = 16; // (assign again to suppress static analysis warning)
   }
-  r->output_available--;
-  const uint32_t x = r->output[r->output_available];  
-  r->output[r->output_available] = 0; // reset once the data is handed out
+  const uint32_t x = ctx->output[16 - ctx->output_available];  
+  ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
+  ctx->output_available--;
   return x;
 }
 
@@ -94,34 +94,34 @@ static inline uint32_t read32(const uint8_t* p, size_t idx32) {
   return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
 }
 
-static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) 
+static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) 
 {
   // since we only use chacha for randomness (and not encryption) we 
   // do not _need_ to read 32-bit values as little endian but we do anyways
   // just for being compatible :-)
-  memset(r, 0, sizeof(*r));
+  memset(ctx, 0, sizeof(*ctx));
   for (size_t i = 0; i < 4; i++) {
     const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
-    r->input[i] = read32(sigma,i);
+    ctx->input[i] = read32(sigma,i);
   }
   for (size_t i = 0; i < 8; i++) {
-    r->input[i + 4] = read32(key,i);
+    ctx->input[i + 4] = read32(key,i);
   }
-  r->input[12] = 0;
-  r->input[13] = 0;
-  r->input[14] = (uint32_t)nonce;
-  r->input[15] = (uint32_t)(nonce >> 32);  
+  ctx->input[12] = 0;
+  ctx->input[13] = 0;
+  ctx->input[14] = (uint32_t)nonce;
+  ctx->input[15] = (uint32_t)(nonce >> 32);  
 }
 
-static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) {
-  memset(init, 0, sizeof(*init));
-  memcpy(init->input, r->input, sizeof(init->input));
-  init->input[12] = 0;
-  init->input[13] = 0;
-  init->input[14] = (uint32_t)nonce;
-  init->input[15] = (uint32_t)(nonce >> 32);
-  mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces!
-  chacha_block(init);
+static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
+  memset(ctx_new, 0, sizeof(*ctx_new));
+  memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
+  ctx_new->input[12] = 0;
+  ctx_new->input[13] = 0;
+  ctx_new->input[14] = (uint32_t)nonce;
+  ctx_new->input[15] = (uint32_t)(nonce >> 32);
+  mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
+  chacha_block(ctx_new);
 }
 
 
@@ -135,10 +135,10 @@ static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
 }
 #endif
 
-void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) {
+void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
   mi_assert_internal(mi_random_is_initialized(ctx));
-  mi_assert_internal(ctx != new_ctx);
-  chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx);
+  mi_assert_internal(ctx != ctx_new);
+  chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
 }
 
 uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {

From e3391d9a53c66f922c6e0ac12df4723701a05110 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 27 Dec 2019 23:33:50 -0800
Subject: [PATCH 064/104] stronger encoding of free lists using two keys per
 page

---
 include/mimalloc-internal.h | 58 +++++++++++++++++++++++++------------
 include/mimalloc-types.h    | 17 ++++++-----
 src/alloc.c                 |  8 ++---
 src/heap.c                  |  2 ++
 src/init.c                  | 30 ++++++++++++-------
 src/page.c                  | 14 ++++-----
 src/random.c                |  2 +-
 src/segment.c               |  2 +-
 8 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index e648c1ff..cdaac963 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -392,12 +392,28 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
 }
 
 
-// -------------------------------------------------------------------
-// Encoding/Decoding the free list next pointers
-// Note: we pass a `null` value to be used as the `NULL` value for the 
-// end of a free list. This is to prevent the cookie itself to ever 
-// be present among user blocks (as `cookie^0==cookie`).
-// -------------------------------------------------------------------
+/* -------------------------------------------------------------------
+Encoding/Decoding the free list next pointers
+
+This is to protect against buffer overflow exploits where the 
+free list is mutated. Many hardened allocators xor the next pointer `p` 
+with a secret key `k1`, as `p^k1`, but if the attacker can guess 
+the pointer `p` this  can reveal `k1` (since `p^k1^p == k1`). 
+Moreover, if multiple blocks can be read, the attacker can
+xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
+about the pointers (and subsequently `k1`).
+
+Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`.
+Since these operations are not associative, the above approaches do not
+work so well any more even if the `p` can be guesstimated. (We include 
+the rotation since xor and addition are otherwise linear in the lowest bit)
+Both keys are unique per page.
+
+We also pass a separate `null` value to be used as `NULL` or otherwise
+`rotl(k2,13)^k1` would appear (too) often as a sentinel value.
+------------------------------------------------------------------- */
+
+#define MI_ENCODE_ROTATE_BITS (13)
 
 static inline bool mi_is_in_same_segment(const void* p, const void* q) {
   return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
@@ -412,49 +428,55 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
   return (idxp == idxq);
 }
 
-static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) {
+static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
+  return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
+}
+static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
+  return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
+}
+static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) {
   #ifdef MI_ENCODE_FREELIST
-  mi_block_t* b = (mi_block_t*)(block->next ^ cookie);
+  mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2);
   if (mi_unlikely((void*)b==null)) { b = NULL; }
   return b;
   #else
-  UNUSED(cookie); UNUSED(null);
+  UNUSED(key1); UNUSED(key2); UNUSED(null);
   return (mi_block_t*)block->next;
   #endif
 }
 
-static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) {
+static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) {
   #ifdef MI_ENCODE_FREELIST
   if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
-  block->next = (mi_encoded_t)next ^ cookie;
+  block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1;
   #else
-  UNUSED(cookie); UNUSED(null);
+  UNUSED(key1); UNUSED(key2); UNUSED(null);
   block->next = (mi_encoded_t)next;
   #endif
 }
 
 static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
   #ifdef MI_ENCODE_FREELIST
-  mi_block_t* next = mi_block_nextx(page,block,page->cookie);
-  // check for free list corruption: is `next` at least in our segment range?
+  mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]);
+  // check for free list corruption: is `next` at least in the same page?
   // TODO: check if `next` is `page->block_size` aligned?
-  if (next!=NULL && !mi_is_in_same_page(block, next)) {
+  if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
     _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
     next = NULL;
   }
   return next;
   #else
   UNUSED(page);
-  return mi_block_nextx(page,block,0);
+  return mi_block_nextx(page,block,0,0);
   #endif
 }
 
 static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
   #ifdef MI_ENCODE_FREELIST
-  mi_block_set_nextx(page,block,next, page->cookie);
+  mi_block_set_nextx(page,block,next, page->key[0], page->key[1]);
   #else
   UNUSED(page);
-  mi_block_set_nextx(page,block, next,0);
+  mi_block_set_nextx(page,block, next,0,0);
   #endif
 }
 
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 1360c125..ab7d7c53 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -191,7 +191,7 @@ typedef struct mi_page_s {
 
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   #ifdef MI_ENCODE_FREELIST
-  uintptr_t             cookie;            // random cookie to encode the free lists
+  uintptr_t             key[2];            // two random keys to encode the free lists (see `_mi_block_next`)
   #endif
   size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
   
@@ -206,9 +206,9 @@ typedef struct mi_page_s {
   struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
 
   // improve page index calculation
-  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word
-  #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST))
-  void*                 padding[1];        // 12 words on 64-bit with cookie, 12 words on 32-bit plain
+  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words
+  #if (MI_INTPTR_SIZE==4)
+  void*                 padding[1];        // 12/14 words on 32-bit plain
   #endif
 } mi_page_t;
 
@@ -239,8 +239,8 @@ typedef struct mi_segment_s {
   size_t          capacity;    // count of available pages (`#free + used`)
   size_t          segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
   size_t          segment_info_size;  // space we are using from the first page for segment meta-data and possible guard pages.
-  uintptr_t       cookie;      // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
-
+  uintptr_t       cookie;      // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
+ 
   // layout like this to optimize access in `mi_free`
   size_t          page_shift;  // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
   volatile _Atomic(uintptr_t) thread_id;   // unique id of the thread owning this segment
@@ -289,8 +289,9 @@ struct mi_heap_s {
   mi_page_queue_t       pages[MI_BIN_FULL + 1];                      // queue of pages for each size class (or "bin")
   volatile _Atomic(mi_block_t*) thread_delayed_free;
   uintptr_t             thread_id;                                   // thread this heap belongs too
-  uintptr_t             cookie;
-  mi_random_ctx_t       random;                                      // random number used for secure allocation
+  uintptr_t             cookie;                                      // random cookie to verify pointers (see `_mi_ptr_cookie`)
+  uintptr_t             key[2];                                      // twb random keys used to encode the `thread_delayed_free` list
+  mi_random_ctx_t       random;                                      // random number context used for secure allocation
   size_t                page_count;                                  // total number of pages in the `pages` queues.
   bool                  no_reclaim;                                  // `true` if this heap should not reclaim abandoned pages
 };
diff --git a/src/alloc.c b/src/alloc.c
index e68b48d2..714acc76 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con
 }
 
 static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
-  mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field
+  mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
   if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&        // quick check: aligned pointer?
       (n==NULL || mi_is_in_same_segment(block, n)))    // quick check: in same segment or NULL?
   { 
@@ -242,7 +242,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
       mi_block_t* dfree;
       do {
         dfree = (mi_block_t*)heap->thread_delayed_free;
-        mi_block_set_nextx(heap,block,dfree, heap->cookie);
+        mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]);
       } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
     }
 
@@ -266,7 +266,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
   // and push it on the free list
   if (mi_likely(local)) {
     // owning thread can free a block directly
-    if (mi_check_is_double_free(page, block)) return;
+    if (mi_unlikely(mi_check_is_double_free(page, block))) return;
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
     page->used--;
@@ -341,7 +341,7 @@ void mi_free(void* p) mi_attr_noexcept
   if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)p;
-    if (mi_check_is_double_free(page,block)) return;    
+    if (mi_unlikely(mi_check_is_double_free(page,block))) return;    
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
     page->used--;
diff --git a/src/heap.c b/src/heap.c
index 6d6948df..f90c4624 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -193,6 +193,8 @@ mi_heap_t* mi_heap_new(void) {
   heap->thread_id = _mi_thread_id();
   _mi_random_split(&bheap->random, &heap->random);
   heap->cookie = _mi_heap_random_next(heap) | 1;  
+  heap->key[0] = _mi_heap_random_next(heap);
+  heap->key[1] = _mi_heap_random_next(heap);
   heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
   return heap;
 }
diff --git a/src/init.c b/src/init.c
index 768bc2bf..cadcd2a3 100644
--- a/src/init.c
+++ b/src/init.c
@@ -16,13 +16,13 @@ const mi_page_t _mi_page_empty = {
   { 0 }, false,
   NULL,    // free
   #if MI_ENCODE_FREELIST
-  0,
+  { 0, 0 },
   #endif
   0,       // used
   NULL,
   ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
   0, NULL, NULL, NULL
-  #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST))
+  #if (MI_INTPTR_SIZE==4)
   , { NULL } // padding
   #endif
 };
@@ -83,8 +83,9 @@ const mi_heap_t _mi_heap_empty = {
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
   ATOMIC_VAR_INIT(NULL),
-  0,
-  0,
+  0,    // tid
+  0,    // cookie
+  { 0, 0 }, // keys
   { {0}, {0}, 0 },
   0,
   false
@@ -105,18 +106,21 @@ static mi_tld_t tld_main = {
   { MI_STATS_NULL }             // stats
 };
 
+#if MI_INTPTR_SIZE==8   
+#define MI_INIT_COOKIE  (0xCDCDCDCDCDCDCDCDUL)
+#else
+#define MI_INIT_COOKIE  (0xCDCDCDCDUL)
+#endif
+
 mi_heap_t _mi_heap_main = {
   &tld_main,
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
   NULL,
-  0,      // thread id
-#if MI_INTPTR_SIZE==8   // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!)
-  0xCDCDCDCDCDCDCDCDUL,
-#else
-  0xCDCDCDCDUL,
-#endif
-  { {0}, {0}, 0 }, // random
+  0,                // thread id
+  MI_INIT_COOKIE,   // initial cookie
+  { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
+  { {0}, {0}, 0 },  // random
   0,      // page count
   false   // can reclaim
 };
@@ -156,6 +160,8 @@ static bool _mi_heap_init(void) {
     heap->thread_id = _mi_thread_id();
     _mi_random_init(&heap->random);    
     heap->cookie = _mi_heap_random_next(heap) | 1;
+    heap->key[0] = _mi_heap_random_next(heap);
+    heap->key[1] = _mi_heap_random_next(heap);
     heap->tld = tld;
     memset(tld, 0, sizeof(*tld));
     tld->heap_backing = heap;
@@ -399,6 +405,8 @@ void mi_process_init(void) mi_attr_noexcept {
   _mi_random_init(&_mi_heap_main.random);  
   #ifndef __APPLE__  // TODO: fix this? cannot update cookie if allocation already happened..
   _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
+  _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
+  _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
   #endif
   mi_process_setup_auto_thread_done();
   _mi_os_init();
diff --git a/src/page.c b/src/page.c
index 471dca97..901fbda1 100644
--- a/src/page.c
+++ b/src/page.c
@@ -103,7 +103,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
 bool _mi_page_is_valid(mi_page_t* page) {
   mi_assert_internal(mi_page_is_valid_init(page));
   #if MI_SECURE
-  mi_assert_internal(page->cookie != 0);
+  mi_assert_internal(page->key != 0);
   #endif
   if (page->heap!=NULL) {
     mi_segment_t* segment = _mi_page_segment(page);
@@ -284,7 +284,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
 
   // and free them all
   while(block != NULL) {
-    mi_block_t* next = mi_block_nextx(heap,block, heap->cookie);
+    mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]);
     // use internal free instead of regular one to keep stats etc correct
     if (!_mi_free_delayed_block(block)) {
       // we might already start delayed freeing while another thread has not yet
@@ -292,9 +292,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
       mi_block_t* dfree;
       do {
         dfree = (mi_block_t*)heap->thread_delayed_free;
-        mi_block_set_nextx(heap, block, dfree, heap->cookie);
+        mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]);
       } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
-
     }
     block = next;
   }
@@ -357,7 +356,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
 
 #if MI_DEBUG>1
   // check there are no references left..
-  for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) {
+  for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) {
     mi_assert_internal(_mi_ptr_page(block) != page);
   }
 #endif
@@ -608,7 +607,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
   #ifdef MI_ENCODE_FREELIST
-  page->cookie = _mi_heap_random_next(heap) | 1;
+  page->key[0] = _mi_heap_random_next(heap);
+  page->key[1] = _mi_heap_random_next(heap);
   #endif
   page->is_zero = page->is_zero_init;
 
@@ -621,7 +621,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page->prev == NULL);
   mi_assert_internal(!mi_page_has_aligned(page));
   #if (MI_ENCODE_FREELIST)
-  mi_assert_internal(page->cookie != 0);
+  mi_assert_internal(page->key != 0);
   #endif
   mi_assert_expensive(mi_page_is_valid_init(page));
 
diff --git a/src/random.c b/src/random.c
index 43e7dd5c..af6cd876 100644
--- a/src/random.c
+++ b/src/random.c
@@ -231,9 +231,9 @@ void _mi_random_init(mi_random_ctx_t* ctx) {
   if (!os_random_buf(key, sizeof(key))) {
     // if we fail to get random data from the OS, we fall back to a 
     // weak random source based on the current time
+    _mi_warning_message("unable to use secure randomness\n");
     uintptr_t x = os_random_weak(0);
     for (size_t i = 0; i < 8; i++) {  // key is eight 32-bit words.
-      _mi_warning_message("unable to use secure randomness\n");
       x = _mi_random_shuffle(x);
       ((uint32_t*)key)[i] = (uint32_t)x;
     }
diff --git a/src/segment.c b/src/segment.c
index f6ce939b..bbe88f82 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -520,7 +520,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   segment->segment_size = segment_size;
   segment->segment_info_size = pre_size;
   segment->thread_id  = _mi_thread_id();
-  segment->cookie = _mi_ptr_cookie(segment);
+  segment->cookie = _mi_ptr_cookie(segment);  
   // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
 
   // set protection

From 77134e1ad072aa3bf3fd5e225f58ae88b48db589 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 28 Dec 2019 15:17:49 -0800
Subject: [PATCH 065/104] update free list encoding to stronger formula with
 addition last

---
 include/mimalloc-internal.h | 29 +++++++++++++++++------------
 src/page.c                  |  2 +-
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index cdaac963..d41dfadc 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -397,24 +397,26 @@ Encoding/Decoding the free list next pointers
 
 This is to protect against buffer overflow exploits where the 
 free list is mutated. Many hardened allocators xor the next pointer `p` 
-with a secret key `k1`, as `p^k1`, but if the attacker can guess 
+with a secret key `k1`, as `p^k1`. This prevents overwriting with known
+values but might be still too weak: if the attacker can guess 
 the pointer `p` this  can reveal `k1` (since `p^k1^p == k1`). 
-Moreover, if multiple blocks can be read, the attacker can
+Moreover, if multiple blocks can be read as well, the attacker can
 xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
 about the pointers (and subsequently `k1`).
 
-Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`.
+Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
 Since these operations are not associative, the above approaches do not
-work so well any more even if the `p` can be guesstimated. (We include 
-the rotation since xor and addition are otherwise linear in the lowest bit)
-Both keys are unique per page.
+work so well any more even if the `p` can be guesstimated. For example,
+for the read case we can subtract two entries to discard the `+k1` term, 
+but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
+We include the left-rotation since xor and addition are otherwise linear 
+in the lowest bit. Finally, both keys are unique per page which reduces
+the re-use of keys by a large factor.
 
 We also pass a separate `null` value to be used as `NULL` or otherwise
-`rotl(k2,13)^k1` would appear (too) often as a sentinel value.
+`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
 ------------------------------------------------------------------- */
 
-#define MI_ENCODE_ROTATE_BITS (13)
-
 static inline bool mi_is_in_same_segment(const void* p, const void* q) {
   return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
 }
@@ -429,14 +431,17 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
 }
 
 static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
+  shift %= MI_INTPTR_BITS;
   return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
 }
 static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
+  shift %= MI_INTPTR_BITS;
   return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
 }
+
 static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) {
   #ifdef MI_ENCODE_FREELIST
-  mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2);
+  mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2);
   if (mi_unlikely((void*)b==null)) { b = NULL; }
   return b;
   #else
@@ -448,7 +453,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
 static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) {
   #ifdef MI_ENCODE_FREELIST
   if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
-  block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1;
+  block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1;
   #else
   UNUSED(key1); UNUSED(key2); UNUSED(null);
   block->next = (mi_encoded_t)next;
@@ -485,7 +490,7 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
 // -------------------------------------------------------------------
 
 static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
-  mi_assert_internal(x!=0);
+  if (x==0) { x = 17; }   // ensure we don't get stuck in generating zeros
 #if (MI_INTPTR_SIZE==8)
   // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
   x ^= x >> 30;
diff --git a/src/page.c b/src/page.c
index 901fbda1..b070e56a 100644
--- a/src/page.c
+++ b/src/page.c
@@ -479,7 +479,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
   counts[current]--;
   mi_block_t* const free_start = blocks[current];
   // and iterate through the rest; use `random_shuffle` for performance
-  uintptr_t rnd = _mi_random_shuffle(r);
+  uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
   for (size_t i = 1; i < extend; i++) {
     // call random_shuffle only every INTPTR_SIZE rounds
     const size_t round = i%MI_INTPTR_SIZE;

From fc3e537bd4ac6d9ffec0243ec595ed15ca1649b8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 28 Dec 2019 15:28:13 -0800
Subject: [PATCH 066/104] improve double free detection with faster same page
 check

---
 include/mimalloc-types.h |  2 +-
 src/alloc.c              | 26 +++++++++++---------------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index ab7d7c53..76539bd6 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // checks for double free. (may be more expensive)
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 0
+#define MI_SECURE 4
 #endif
 
 // Define MI_DEBUG for debug mode
diff --git a/src/alloc.c b/src/alloc.c
index 714acc76..82d97786 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -140,28 +140,24 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
 }
 
 static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) {
-  size_t psize;
-  uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
-  if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) {
-    // Suspicious: the decoded value is in the same page (or NULL).
-    // Walk the free lists to verify positively if it is already freed
-    if (mi_list_contains(page, page->free, block) ||
-        mi_list_contains(page, page->local_free, block) ||
-        mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) 
-    {
-      _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
-      return true;
-    }
+  // The decoded value is in the same page (or NULL).
+  // Walk the free lists to verify positively if it is already freed
+  if (mi_list_contains(page, page->free, block) ||
+      mi_list_contains(page, page->local_free, block) ||
+      mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) 
+  {
+    _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
+    return true;
   }
   return false;
 }
 
 static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
   mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
-  if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&        // quick check: aligned pointer?
-      (n==NULL || mi_is_in_same_segment(block, n)))    // quick check: in same segment or NULL?
+  if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&  // quick check: aligned pointer?
+      (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
   { 
-    // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free?
+    // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
     // (continue in separate function to improve code generation)
     return mi_check_is_double_freex(page, block, n);
   }  

From 1b5a08cd25ee0034942df3d5f67dab2d891ba3c1 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Thu, 2 Jan 2020 17:24:32 -0800
Subject: [PATCH 067/104] remove unused parameter in check double free

---
 src/segment.c | 72 +++++++++++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index bbe88f82..676df00a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -184,7 +184,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t*
       mi_segment_protect_range(start, os_page_size, protect);
     }
     else {
-      // or protect every page 
+      // or protect every page
       const size_t page_size = mi_segment_page_size(segment);
       for (size_t i = 0; i < segment->capacity; i++) {
         if (segment->pages[i].is_committed) {
@@ -215,8 +215,8 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m
 }
 
 static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld)
-{  
-  mi_assert_internal(page->is_reset);  
+{
+  mi_assert_internal(page->is_reset);
   mi_assert_internal(!segment->mem_is_fixed);
   page->is_reset = false;
   size_t psize;
@@ -276,14 +276,14 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
     }
     mi_assert_internal((uintptr_t)p % block_size == 0);
   }
-    
+
   if (page_size != NULL) *page_size = psize;
   mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page);
   mi_assert_internal(_mi_ptr_segment(p) == segment);
   return p;
 }
 
-static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) 
+static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size)
 {
   const size_t minsize   = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */;
   size_t guardsize = 0;
@@ -331,16 +331,16 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
     mi_assert_internal(!segment->mem_is_fixed);
     mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set
   }
-  
+
   bool any_reset = false;
   bool fully_committed = true;
   for (size_t i = 0; i < segment->capacity; i++) {
-    mi_page_t* page = &segment->pages[i];    
+    mi_page_t* page = &segment->pages[i];
     if (!page->is_committed) { fully_committed = false; }
     if (page->is_reset)      { any_reset = true; }
   }
-  if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { 
-    fully_committed = false; 
+  if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) {
+    fully_committed = false;
   }
   if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
     fully_committed = false;
@@ -366,13 +366,13 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
   return segment;
 }
 
-static bool mi_segment_cache_full(mi_segments_tld_t* tld) 
+static bool mi_segment_cache_full(mi_segments_tld_t* tld)
 {
   // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
   size_t max_cache = mi_option_get(mi_option_segment_cache);
   if (tld->cache_count < max_cache
        && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache
-     ) { 
+     ) {
     return false;
   }
   // take the opportunity to reduce the segment cache if it is too large (now)
@@ -387,7 +387,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld)
 
 static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld));
-  mi_assert_internal(segment->next == NULL);  
+  mi_assert_internal(segment->next == NULL);
   if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
     return false;
   }
@@ -434,21 +434,21 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t pre_size;
   size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size);
   mi_assert_internal(segment_size >= required);
-  
+
   // Initialize parameters
-  bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
-  bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
+  const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
+  const bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager; // || (page_kind >= MI_PAGE_LARGE);
   bool pages_still_good = false;
   bool is_zero = false;
-  
+
   // Try to get it from our thread local cache first
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
   if (segment != NULL) {
     if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) {
       pages_still_good = true;
     }
-    else 
+    else
     {
       if (MI_SECURE!=0) {
         mi_assert_internal(!segment->mem_is_fixed);
@@ -458,7 +458,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
       // TODO: optimize cache pop to return fitting pages if possible?
       for (size_t i = 0; i < segment->capacity; i++) {
         mi_page_t* page = &segment->pages[i];
-        if (page->is_reset) { 
+        if (page->is_reset) {
           if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) {
             page->is_reset = false;
           }
@@ -473,12 +473,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
         _mi_mem_commit(segment, pre_size, &commit_zero, tld->os);
         if (commit_zero) is_zero = true;
       }
-    }    
+    }
   }
   else {
     // Allocate the segment from the OS
     size_t memid;
-    bool   mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
+    bool   mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
     segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {
@@ -489,12 +489,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     }
     segment->memid = memid;
     segment->mem_is_fixed = mem_large;
-    segment->mem_is_committed = commit;    
+    segment->mem_is_committed = commit;
     mi_segments_track_size((long)segment_size, tld);
   }
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
-  if (!pages_still_good) {    
+  if (!pages_still_good) {
     // zero the segment info (but not the `mem` fields)
     ptrdiff_t ofs = offsetof(mi_segment_t, next);
     memset((uint8_t*)segment + ofs, 0, info_size - ofs);
@@ -520,12 +520,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   segment->segment_size = segment_size;
   segment->segment_info_size = pre_size;
   segment->thread_id  = _mi_thread_id();
-  segment->cookie = _mi_ptr_cookie(segment);  
+  segment->cookie = _mi_ptr_cookie(segment);
   // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
 
   // set protection
   mi_segment_protect(segment, true, tld->os);
-  
+
   //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
   return segment;
 }
@@ -541,8 +541,8 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
   mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment));
   mi_assert(segment->next == NULL);
   mi_assert(segment->prev == NULL);
-  _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);  
-  
+  _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
+
   if (!force && mi_segment_cache_push(segment, tld)) {
     // it is put in our cache
   }
@@ -569,12 +569,12 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
     if (!page->segment_in_use) {
       // set in-use before doing unreset to prevent delayed reset
       page->segment_in_use = true;
-      segment->used++;                
+      segment->used++;
       if (!page->is_committed) {
         mi_assert_internal(!segment->mem_is_fixed);
         mi_assert_internal(!page->is_reset);
         page->is_committed = true;
-        if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) {
+        if (segment->page_kind < MI_PAGE_LARGE || !mi_option_is_enabled(mi_option_eager_page_commit)) {
           size_t psize;
           uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
           bool is_zero = false;
@@ -586,7 +586,7 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
       }
       if (page->is_reset) {
         mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset?
-      }      
+      }
       return page;
     }
   }
@@ -608,7 +608,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
-  
+
   // calculate the used size from the raw (non-aligned) start of the page
   //size_t pre_size;
   //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size);
@@ -621,7 +621,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   // note: must come after setting `segment_in_use` to false but before block_size becomes 0
   mi_page_reset(segment, page, 0 /*used_size*/, tld);
 
-  // zero the page data, but not the segment fields  
+  // zero the page data, but not the segment fields
   ptrdiff_t ofs = offsetof(mi_page_t,capacity);
   memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
   segment->used--;
@@ -674,7 +674,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // remove the segment from the free page queue if needed
   mi_segment_remove_from_free_queue(segment,tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
-  
+
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
@@ -691,7 +691,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_expensive(mi_segment_is_valid(segment));
-  segment->abandoned++;  
+  segment->abandoned++;
   _mi_stat_increase(&tld->stats->pages_abandoned, 1);
   mi_assert_internal(segment->abandoned <= segment->used);
   if (segment->used == segment->abandoned) {
@@ -744,7 +744,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
           mi_segment_page_clear(segment,page,tld);
         }
         else {
-          // otherwise reclaim it          
+          // otherwise reclaim it
           _mi_page_reclaim(heap,page);
         }
       }
@@ -774,7 +774,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
   mi_page_t* page = mi_segment_find_free(segment, tld);
-  mi_assert_internal(page->segment_in_use);  
+  mi_assert_internal(page->segment_in_use);
   mi_assert_internal(segment->used <= segment->capacity);
   if (segment->used == segment->capacity) {
     // if no more free pages, remove from the queue
@@ -813,7 +813,7 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld
 
 static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld);
-  if (segment == NULL) return NULL;  
+  if (segment == NULL) return NULL;
   mi_page_t* page = mi_segment_find_free(segment, tld);
   mi_assert_internal(page != NULL);
 #if MI_DEBUG>=2

From 9629a0190f5eac495936e0b0970b4343c6abb975 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Thu, 2 Jan 2020 17:25:00 -0800
Subject: [PATCH 068/104] fix eager commit on large pages (issue #182)

---
 src/alloc.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 82d97786..8ee78338 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -125,7 +125,7 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept {
 
 
 // ------------------------------------------------------
-// Check for double free in secure and debug mode 
+// Check for double free in secure and debug mode
 // This is somewhat expensive so only enabled for secure mode 4
 // ------------------------------------------------------
 
@@ -139,12 +139,12 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
   return false;
 }
 
-static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) {
+static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
   // The decoded value is in the same page (or NULL).
   // Walk the free lists to verify positively if it is already freed
   if (mi_list_contains(page, page->free, block) ||
       mi_list_contains(page, page->local_free, block) ||
-      mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) 
+      mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
   {
     _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
     return true;
@@ -156,11 +156,11 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
   mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
   if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&  // quick check: aligned pointer?
       (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
-  { 
+  {
     // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
     // (continue in separate function to improve code generation)
-    return mi_check_is_double_freex(page, block, n);
-  }  
+    return mi_check_is_double_freex(page, block);
+  }
   return false;
 }
 #else
@@ -337,7 +337,7 @@ void mi_free(void* p) mi_attr_noexcept
   if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)p;
-    if (mi_unlikely(mi_check_is_double_free(page,block))) return;    
+    if (mi_unlikely(mi_check_is_double_free(page,block))) return;
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
     page->used--;

From f9ca88f71cbc3f43601ddedd6547f3a85c865bb5 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Thu, 2 Jan 2020 17:57:41 -0800
Subject: [PATCH 069/104] set secure default to 0 again

---
 include/mimalloc-types.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 76539bd6..d334489c 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // checks for double free. (may be more expensive)
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 4
+#define MI_SECURE 0
 #endif
 
 // Define MI_DEBUG for debug mode
@@ -46,7 +46,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Encoded free lists allow detection of corrupted free lists
 // and can detect buffer overflows and double `free`s.
-#if (MI_SECURE>=3 || MI_DEBUG>=1) 
+#if (MI_SECURE>=3 || MI_DEBUG>=1)
 #define MI_ENCODE_FREELIST  1
 #endif
 
@@ -109,8 +109,8 @@ terms of the MIT license. A copy of the license can be found in the file
 // (Except for large pages since huge objects are allocated in 4MiB chunks)
 #define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 16kb
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128kb
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_LARGE_PAGE_SIZE/2)   // 2mb 
-#define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)     
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_LARGE_PAGE_SIZE/2)   // 2mb
+#define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 #define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
 
 // Minimal alignment necessary. On most platforms 16 bytes are needed
@@ -143,14 +143,14 @@ typedef enum mi_delayed_e {
 } mi_delayed_t;
 
 
-// The `in_full` and `has_aligned` page flags are put in a union to efficiently 
+// The `in_full` and `has_aligned` page flags are put in a union to efficiently
 // test if both are false (`full_aligned == 0`) in the `mi_free` routine.
 typedef union mi_page_flags_s {
   uint8_t full_aligned;
   struct {
     uint8_t in_full : 1;
     uint8_t has_aligned : 1;
-  } x; 
+  } x;
 } mi_page_flags_t;
 
 // Thread free list.
@@ -182,7 +182,7 @@ typedef struct mi_page_s {
   uint8_t               is_reset:1;        // `true` if the page memory was reset
   uint8_t               is_committed:1;    // `true` if the page virtual memory is committed
   uint8_t               is_zero_init:1;    // `true` if the page was zero initialized
-  
+
   // layout like this to optimize access in `mi_malloc` and `mi_free`
   uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
   uint16_t              reserved;          // number of blocks reserved in memory
@@ -194,7 +194,7 @@ typedef struct mi_page_s {
   uintptr_t             key[2];            // two random keys to encode the free lists (see `_mi_block_next`)
   #endif
   size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
-  
+
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
   volatile _Atomic(uintptr_t)        thread_freed;  // at least this number of blocks are in `thread_free`
   volatile _Atomic(mi_thread_free_t) thread_free;   // list of deferred free blocks freed by other threads
@@ -227,7 +227,7 @@ typedef enum mi_page_kind_e {
 typedef struct mi_segment_s {
   // memory fields
   size_t          memid;            // id for the os-level memory manager
-  bool            mem_is_fixed;     // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
+  bool            mem_is_fixed;     // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
   bool            mem_is_committed; // `true` if the whole segment is eagerly committed
 
   // segment fields
@@ -240,7 +240,7 @@ typedef struct mi_segment_s {
   size_t          segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
   size_t          segment_info_size;  // space we are using from the first page for segment meta-data and possible guard pages.
   uintptr_t       cookie;      // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
- 
+
   // layout like this to optimize access in `mi_free`
   size_t          page_shift;  // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
   volatile _Atomic(uintptr_t) thread_id;   // unique id of the thread owning this segment

From eeb623e6af4d00d96a147a0d782298c5e4db987d Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Fri, 3 Jan 2020 17:06:41 -0800
Subject: [PATCH 070/104] increase retire limit, collect retired pages

---
 include/mimalloc-types.h |  3 ++-
 src/init.c               | 28 ++++++++++++---------
 src/page.c               | 54 +++++++++++++++++++++++++++++-----------
 3 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index d334489c..68529c3f 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -187,7 +187,8 @@ typedef struct mi_page_s {
   uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
   uint16_t              reserved;          // number of blocks reserved in memory
   mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
-  bool                  is_zero;           // `true` if the blocks in the free list are zero initialized
+  uint8_t               is_zero:1;         // `true` if the blocks in the free list are zero initialized
+  uint8_t               retire_expire:7;   // expiration count for retired blocks
 
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   #ifdef MI_ENCODE_FREELIST
diff --git a/src/init.c b/src/init.c
index cadcd2a3..3df854cf 100644
--- a/src/init.c
+++ b/src/init.c
@@ -12,8 +12,12 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Empty page used to initialize the small free pages array
 const mi_page_t _mi_page_empty = {
-  0, false, false, false, false, 0, 0,
-  { 0 }, false,
+  0, false, false, false, false,
+  0,       // capacity
+  0,       // reserved capacity
+  { 0 },   // flags
+  false,   // is_zero
+  0,       // retire_expire
   NULL,    // free
   #if MI_ENCODE_FREELIST
   { 0, 0 },
@@ -83,11 +87,11 @@ const mi_heap_t _mi_heap_empty = {
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
   ATOMIC_VAR_INIT(NULL),
-  0,    // tid
-  0,    // cookie
-  { 0, 0 }, // keys
+  0,                // tid
+  0,                // cookie
+  { 0, 0 },         // keys
   { {0}, {0}, 0 },
-  0,
+  0,                // page count
   false
 };
 
@@ -106,7 +110,7 @@ static mi_tld_t tld_main = {
   { MI_STATS_NULL }             // stats
 };
 
-#if MI_INTPTR_SIZE==8   
+#if MI_INTPTR_SIZE==8
 #define MI_INIT_COOKIE  (0xCDCDCDCDCDCDCDCDUL)
 #else
 #define MI_INIT_COOKIE  (0xCDCDCDCDUL)
@@ -121,8 +125,8 @@ mi_heap_t _mi_heap_main = {
   MI_INIT_COOKIE,   // initial cookie
   { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
   { {0}, {0}, 0 },  // random
-  0,      // page count
-  false   // can reclaim
+  0,                // page count
+  false             // can reclaim
 };
 
 bool _mi_process_is_initialized = false;  // set to `true` in `mi_process_init`.
@@ -136,7 +140,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
 
 typedef struct mi_thread_data_s {
   mi_heap_t  heap;  // must come first due to cast in `_mi_heap_done`
-  mi_tld_t   tld;  
+  mi_tld_t   tld;
 } mi_thread_data_t;
 
 // Initialize the thread local default heap, called from `mi_thread_init`
@@ -158,7 +162,7 @@ static bool _mi_heap_init(void) {
     mi_heap_t* heap = &td->heap;
     memcpy(heap, &_mi_heap_empty, sizeof(*heap));
     heap->thread_id = _mi_thread_id();
-    _mi_random_init(&heap->random);    
+    _mi_random_init(&heap->random);
     heap->cookie = _mi_heap_random_next(heap) | 1;
     heap->key[0] = _mi_heap_random_next(heap);
     heap->key[1] = _mi_heap_random_next(heap);
@@ -402,7 +406,7 @@ void mi_process_init(void) mi_attr_noexcept {
 
   _mi_heap_main.thread_id = _mi_thread_id();
   _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
-  _mi_random_init(&_mi_heap_main.random);  
+  _mi_random_init(&_mi_heap_main.random);
   #ifndef __APPLE__  // TODO: fix this? cannot update cookie if allocation already happened..
   _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
   _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
diff --git a/src/page.c b/src/page.c
index b070e56a..f5f51a72 100644
--- a/src/page.c
+++ b/src/page.c
@@ -229,7 +229,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_expensive(mi_page_is_valid_init(page));
   mi_assert_internal(page->heap == NULL);
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
-  mi_assert_internal(!page->is_reset);  
+  mi_assert_internal(!page->is_reset);
   _mi_page_free_collect(page,false);
   mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
   mi_page_queue_push(heap, pq, page);
@@ -342,7 +342,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(pq == mi_page_queue_of(page));
   mi_assert_internal(page->heap != NULL);
-  
+
 #if MI_DEBUG > 1
   mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
 #endif
@@ -392,7 +392,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
       _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
     }
   }
-  
+
   // remove from the page list
   // (no need to do _mi_heap_delayed_free first as all blocks are already free)
   mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
@@ -420,20 +420,40 @@ void _mi_page_retire(mi_page_t* page) {
   // (or we end up retiring and re-allocating most of the time)
   // NOTE: refine this more: we should not retire if this
   // is the only page left with free blocks. It is not clear
-  // how to check this efficiently though... 
+  // how to check this efficiently though...
   // for now, we don't retire if it is the only page left of this size class.
   mi_page_queue_t* pq = mi_page_queue_of(page);
-  if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) {
-    // if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
-    if (pq->last==page && pq->first==page) {
+  if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
+    if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      return; // dont't retire after all
+      page->retire_expire = 2;
+      mi_assert_internal(mi_page_all_free(page));
+      return; // dont't free after all
     }
   }
 
   _mi_page_free(page, pq, false);
 }
 
+// free retired pages: we don't need to look at the entire queues
+// since we only retire pages that are the last one in a queue.
+static void mi_page_retired_collect(mi_heap_t* heap) {
+  for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) {
+    mi_page_t* page = pq->first;
+    if (page != NULL && page->retire_expire != 0) {
+      if (mi_page_all_free(page)) {
+        page->retire_expire--;
+        if (page->retire_expire == 0) {
+          _mi_page_free(pq->first, pq, false);
+        }
+      }
+      else {
+        page->retire_expire = 0;
+      }
+    }
+  }
+}
+
 
 /* -----------------------------------------------------------
   Initialize the initial free list in a page.
@@ -499,7 +519,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
   }
   // prepend to the free list (usually NULL)
   mi_block_set_next(page, blocks[current], page->free);  // end of the list
-  page->free = free_start;  
+  page->free = free_start;
 }
 
 static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
@@ -513,15 +533,15 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
   void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
   const size_t bsize = page->block_size;
   mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity);
-  
+
   // initialize a sequential free list
-  mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);  
+  mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
   mi_block_t* block = start;
   while(block <= last) {
     mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
     mi_block_set_next(page,block,next);
     block = next;
-  }  
+  }
   // prepend to free list (usually `NULL`)
   mi_block_set_next(page, last, page->free);
   page->free = start;
@@ -619,6 +639,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page->thread_freed == 0);
   mi_assert_internal(page->next == NULL);
   mi_assert_internal(page->prev == NULL);
+  mi_assert_internal(page->retire_expire == 0);
   mi_assert_internal(!mi_page_has_aligned(page));
   #if (MI_ENCODE_FREELIST)
   mi_assert_internal(page->key != 0);
@@ -699,8 +720,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
   }
   else {
     mi_assert(pq->first == page);
+    page->retire_expire = 0;
   }
   mi_assert_internal(page == NULL || mi_page_immediate_available(page));
+
+  // finally collect retired pages
+  mi_page_retired_collect(heap);
   return page;
 }
 
@@ -719,6 +744,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
       _mi_page_free_collect(page,false);
     }
     if (mi_page_immediate_available(page)) {
+      page->retire_expire = 0;
       return page; // fast path
     }
   }
@@ -759,7 +785,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
 // that frees the block can free the whole page and segment directly.
 static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
   size_t block_size = _mi_os_good_alloc_size(size);
-  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);  
+  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
   mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size);
   if (page != NULL) {
     mi_assert_internal(mi_page_immediate_available(page));
@@ -777,7 +803,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
       _mi_stat_increase(&heap->tld->stats.huge, block_size);
       _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1);
     }
-  }  
+  }
   return page;
 }
 

From 2b108c8748410b81ca239c4f6a3639845d135587 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 3 Jan 2020 21:39:18 -0800
Subject: [PATCH 071/104] increase retire expiration to 4

---
 include/mimalloc-internal.h |  1 +
 src/heap.c                  |  5 +++--
 src/page.c                  | 10 +++++-----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index d41dfadc..cfbd9782 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -90,6 +90,7 @@ void       _mi_page_unfull(mi_page_t* page);
 void       _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force);   // free the page
 void       _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);            // abandon the page, to be picked up by another thread...
 void       _mi_heap_delayed_free(mi_heap_t* heap);
+void       _mi_heap_collect_retired(mi_heap_t* heap, bool force);
 
 void       _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay);
 size_t     _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
diff --git a/src/heap.c b/src/heap.c
index f90c4624..963cb982 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -46,7 +46,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
 
 
 #if MI_DEBUG>=3
-static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
+static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
   UNUSED(arg1);
   UNUSED(arg2);
   UNUSED(pq);
@@ -59,7 +59,7 @@ static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page
 
 static bool mi_heap_is_valid(mi_heap_t* heap) {
   mi_assert_internal(heap!=NULL);
-  mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL);
+  mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
   return true;
 }
 #endif
@@ -84,6 +84,7 @@ typedef enum mi_collect_e {
 static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
   UNUSED(arg2);
   UNUSED(heap);
+  mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
   mi_collect_t collect = *((mi_collect_t*)arg_collect);
   _mi_page_free_collect(page, collect >= ABANDON);
   if (mi_page_all_free(page)) {
diff --git a/src/page.c b/src/page.c
index f5f51a72..b0b500ca 100644
--- a/src/page.c
+++ b/src/page.c
@@ -426,7 +426,7 @@ void _mi_page_retire(mi_page_t* page) {
   if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      page->retire_expire = 2;
+      page->retire_expire = 4;
       mi_assert_internal(mi_page_all_free(page));
       return; // dont't free after all
     }
@@ -437,14 +437,14 @@ void _mi_page_retire(mi_page_t* page) {
 
 // free retired pages: we don't need to look at the entire queues
 // since we only retire pages that are the last one in a queue.
-static void mi_page_retired_collect(mi_heap_t* heap) {
+void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
   for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) {
     mi_page_t* page = pq->first;
     if (page != NULL && page->retire_expire != 0) {
       if (mi_page_all_free(page)) {
         page->retire_expire--;
-        if (page->retire_expire == 0) {
-          _mi_page_free(pq->first, pq, false);
+        if (force || page->retire_expire == 0) {
+          _mi_page_free(pq->first, pq, force);
         }
       }
       else {
@@ -725,7 +725,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
   mi_assert_internal(page == NULL || mi_page_immediate_available(page));
 
   // finally collect retired pages
-  mi_page_retired_collect(heap);
+  _mi_heap_collect_retired(heap,false);
   return page;
 }
 

From d596f0856930a885007088ff52db8db051963da0 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 3 Jan 2020 22:06:27 -0800
Subject: [PATCH 072/104] fix thread_free read in assertion

---
 src/alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/alloc.c b/src/alloc.c
index 8ee78338..bd81aba0 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -142,9 +142,10 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
 static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
   // The decoded value is in the same page (or NULL).
   // Walk the free lists to verify positively if it is already freed
+  mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free));
   if (mi_list_contains(page, page->free, block) ||
       mi_list_contains(page, page->local_free, block) ||
-      mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
+      mi_list_contains(page, mi_tf_block(tf), block))
   {
     _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
     return true;

From a2a9230ad6e404e23a724fa8c820e3533a961716 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 3 Jan 2020 22:52:52 -0800
Subject: [PATCH 073/104] remove empty page removal on page search (no longer
 needed with retired collection and delayed freeing)

---
 src/page.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/src/page.c b/src/page.c
index b0b500ca..c38d7740 100644
--- a/src/page.c
+++ b/src/page.c
@@ -660,9 +660,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
 static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
 {
   // search through the pages in "next fit" order
-  mi_page_t* rpage = NULL;
   size_t count = 0;
-  size_t page_free_count = 0;
   mi_page_t* page = pq->first;
   while( page != NULL)
   {
@@ -674,20 +672,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
     // 1. if the page contains free blocks, we are done
     if (mi_page_immediate_available(page)) {
-      // If all blocks are free, we might retire this page instead.
-      // do this at most 8 times to bound allocation time.
-      // (note: this can happen if a page was earlier not retired due
-      //  to having neighbours that were mostly full or due to concurrent frees)
-      if (page_free_count < 8 && mi_page_all_free(page)) {
-        page_free_count++;
-        if (rpage != NULL) _mi_page_free(rpage,pq,false);
-        rpage = page;
-        page = next;
-        continue;     // and keep looking
-      }
-      else {
-        break;  // pick this one
-      }
+      break;  // pick this one
     }
 
     // 2. Try to extend
@@ -707,14 +692,6 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
   mi_stat_counter_increase(heap->tld->stats.searches,count);
 
-  if (page == NULL) {
-    page = rpage;
-    rpage = NULL;
-  }
-  if (rpage != NULL) {
-    _mi_page_free(rpage,pq,false);
-  }
-
   if (page == NULL) {
     page = mi_page_fresh(heap, pq);
   }

From 59fa2862941fe6c07c526d2221e2557492b3b1ab Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 4 Jan 2020 17:32:50 -0800
Subject: [PATCH 074/104] fix bug where continue would wrongly exit the
 do-while loop for delayed freeing

---
 src/page.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/page.c b/src/page.c
index c38d7740..0df32f4c 100644
--- a/src/page.c
+++ b/src/page.c
@@ -119,23 +119,22 @@ bool _mi_page_is_valid(mi_page_t* page) {
 }
 #endif
 
-
-void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay  ) {
+void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) {
   mi_thread_free_t tfree;
   mi_thread_free_t tfreex;
-
+  mi_delayed_t     old_delay;
   do {
-    tfreex = tfree = page->thread_free;
-    if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) {
-      tfreex = mi_tf_set_delayed(tfree,delay);
-    }
-    else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) {
+    tfree = mi_atomic_read_relaxed(&page->thread_free);
+    tfreex = mi_tf_set_delayed(tfree, delay);
+    old_delay = mi_tf_delayed(tfree);
+    if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
       mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
-      continue;          // and try again
     }
-  }
-  while((mi_tf_delayed(tfreex) !=  mi_tf_delayed(tfree)) && // avoid atomic operation if already equal
-        !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
+    else if (delay == old_delay) {
+      break; // avoid atomic operation if already equal
+    }
+  } while ((old_delay == MI_DELAYED_FREEING) ||
+    !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree));
 }
 
 

From 45582d1fb5e076a334fb9c5fd704da9b7312dc5b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 5 Jan 2020 13:58:49 -0800
Subject: [PATCH 075/104] revert a2a9230 (remove empty page removal on search):
 this is not generally valid when concurrent frees do not always add to
 thread_delayed_free.

---
 src/page.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/page.c b/src/page.c
index 0df32f4c..78570ab0 100644
--- a/src/page.c
+++ b/src/page.c
@@ -659,7 +659,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
 static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
 {
   // search through the pages in "next fit" order
+  mi_page_t* rpage = NULL;
   size_t count = 0;
+  size_t page_free_count = 0;
   mi_page_t* page = pq->first;
   while( page != NULL)
   {
@@ -671,7 +673,20 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
     // 1. if the page contains free blocks, we are done
     if (mi_page_immediate_available(page)) {
-      break;  // pick this one
+      // If all blocks are free, we might retire this page instead.
+      // do this at most 8 times to bound allocation time.
+      // (note: this can happen if a page was earlier not retired due
+      //  to having neighbours that were mostly full or due to concurrent frees)
+      if (page_free_count < 8 && mi_page_all_free(page)) {
+        page_free_count++;
+        if (rpage != NULL) _mi_page_free(rpage,pq,false);
+        rpage = page;
+        page = next;
+        continue;     // and keep looking
+      }
+      else {
+        break;  // pick this one
+      }
     }
 
     // 2. Try to extend
@@ -691,6 +706,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
 
   mi_stat_counter_increase(heap->tld->stats.searches,count);
 
+  if (page == NULL) {
+    page = rpage;
+    rpage = NULL;
+  }
+  if (rpage != NULL) {
+    _mi_page_free(rpage,pq,false);
+  }
+
   if (page == NULL) {
     page = mi_page_fresh(heap, pq);
   }

From d8d69c2c94d0314e546f91bae8f19826aedf1e14 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Sun, 5 Jan 2020 22:07:16 -0800
Subject: [PATCH 076/104] disable MAP_NORESERVE on huge pages

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index d7126e70..c9a04d27 100644
--- a/src/os.c
+++ b/src/os.c
@@ -331,7 +331,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
     }
     else {
-      int lflags = flags;
+      int lflags = flags & ~MAP_NORESERVE;  // using NORESERVE on huge pages seems to fail on Linux
       int lfd = fd;
       #ifdef MAP_ALIGNED_SUPER
       lflags |= MAP_ALIGNED_SUPER;

From 4223caac0fa95b900f89963d99f7c0d1d03a2217 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 6 Jan 2020 22:08:21 -0800
Subject: [PATCH 077/104] on Linux dynamically detect if getrandom is supported
 and fall back to /dev/urandom if needed

---
 src/random.c | 48 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/src/random.c b/src/random.c
index af6cd876..c40a96da 100644
--- a/src/random.c
+++ b/src/random.c
@@ -155,9 +155,9 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
 
 /* ----------------------------------------------------------------------------
 To initialize a fresh random context we rely on the OS:
-- windows: BCryptGenRandom
-- bsd,wasi: arc4random_buf
-- linux: getrandom
+- Windows     : BCryptGenRandom
+- osX,bsd,wasi: arc4random_buf
+- Linux       : getrandom,/dev/urandom
 If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
 -----------------------------------------------------------------------------*/
 
@@ -185,9 +185,47 @@ static bool os_random_buf(void* buf, size_t buf_len) {
   return true;
 }
 #elif defined(__linux__) 
-#include <sys/random.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
 static bool os_random_buf(void* buf, size_t buf_len) {
-  return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len);
+  // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
+  // and for the latter the actual `getrandom` call is not always defined.
+  // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
+  // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
+#ifdef SYS_getrandom
+  #ifndef GRND_NONBLOCK
+  #define GRND_NONBLOCK (1)
+  #endif
+  static volatile _Atomic(uintptr_t) no_getrandom; // = 0
+  if (mi_atomic_read(&no_getrandom)==0) {
+    ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
+    if (ret >= 0) return (buf_len == (size_t)ret);
+    if (ret != ENOSYS) return false;
+    mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom
+  }
+#endif
+  int flags = O_RDONLY;
+  #if defined(O_CLOEXEC)
+  flags |= O_CLOEXEC;
+  #endif
+  int fd = open("/dev/urandom", flags, 0);
+  if (fd < 0) return false;
+  size_t count = 0;
+  while(count < buf_len) {
+    ssize_t ret = read(fd, (char*)buf + count, buf_len - count);
+    if (ret<=0) {
+      if (errno!=EAGAIN && errno!=EINTR) break;
+    }
+    else {
+      count += ret;
+    }
+  }
+  close(fd);
+  return (count==buf_len);
 }
 #else
 static bool os_random_buf(void* buf, size_t buf_len) {

From d4ab0ff08c46bb87ec666e91cecd5b2675388be2 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 7 Jan 2020 14:15:37 -0800
Subject: [PATCH 078/104] fix timeout on huge page reservation if set to 0

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 90ea2b40..b5d41a1a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -325,7 +325,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
   if (numa_count <= 0) numa_count = 1;
   const size_t pages_per = pages / numa_count;
   const size_t pages_mod = pages % numa_count;
-  const size_t timeout_per = (timeout_msecs / numa_count) + 50;
+  const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
 
   // reserve evenly among numa nodes
   for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {

From 50b3f6d7aef19abbe6a985d9be6fa0f7aeb11098 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 12:58:07 -0800
Subject: [PATCH 079/104] fix assertion

---
 src/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/memory.c b/src/memory.c
index 3d6a22f5..ee84f755 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -308,7 +308,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
   if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
     // some blocks are still reset
     mi_assert_internal(!info.is_large);
-    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); 
+    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); 
     mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
     if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
       bool reset_zero = false;

From 5d2f111f64a788108466e89797d6ddafde1163f4 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 12:59:20 -0800
Subject: [PATCH 080/104] make the stress test do more iterations under a
 smaller load to stay under 1GiB committed and increase thread interaction

---
 test/test-stress.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index b549e1b4..924dbce1 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -26,8 +26,8 @@ terms of the MIT license.
 //
 // argument defaults
 static int THREADS = 32;      // more repeatable if THREADS <= #processors
-static int SCALE   = 50;      // scaling factor
-static int ITER    = 10;      // N full iterations destructing and re-creating all threads
+static int SCALE   = 10;      // scaling factor
+static int ITER    = 50;      // N full iterations destructing and re-creating all threads
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int SCALE   = 100;  // scaling factor
@@ -209,7 +209,7 @@ int main(int argc, char** argv) {
     }
     mi_collect(false);
 #ifndef NDEBUG
-    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
+    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
   }
 

From 683d8998d4d56fbb92e447029f36d8ddbfbbf452 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 17:45:38 -0800
Subject: [PATCH 081/104] fix potential A-B-A problem with segment abandonment;
 noticed by Manual Poeter and Sam Gross

---
 include/mimalloc-types.h |  2 +-
 src/segment.c            | 80 ++++++++++++++++++++++++++++------------
 test/test-stress.c       |  6 +--
 3 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 68529c3f..da9bfbac 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -234,7 +234,7 @@ typedef struct mi_segment_s {
   // segment fields
   struct mi_segment_s* next;   // must be the first segment field -- see `segment.c:segment_alloc`
   struct mi_segment_s* prev;
-  volatile _Atomic(struct mi_segment_s*) abandoned_next;
+  struct mi_segment_s* abandoned_next;
   size_t          abandoned;   // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
   size_t          used;        // count of pages in use (`used <= capacity`)
   size_t          capacity;    // count of available pages (`#free + used`)
diff --git a/src/segment.c b/src/segment.c
index 676df00a..97859fa9 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -663,7 +663,28 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 // are "abandoned" and will be reclaimed by other threads to
 // reuse their pages and/or free them eventually
 static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
-static volatile _Atomic(uintptr_t)     abandoned_count; // = 0;
+static volatile _Atomic(uintptr_t)     abandoned_count; // = 0; approximate count of abandoned segments
+
+// prepend a list of abandoned segments atomically to the global abandoned list; O(n)
+static void mi_segments_prepend_abandoned(mi_segment_t* first) {
+  if (first == NULL) return;
+
+  // first try if the abandoned list happens to be NULL
+  if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return;
+
+  // if not, find the end of the list
+  mi_segment_t* last = first;
+  while (last->abandoned_next != NULL) {
+    last = last->abandoned_next;
+  }
+
+  // and atomically prepend
+  mi_segment_t* next;
+  do {
+    next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned));
+    last->abandoned_next = next;
+  } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next));
+}
 
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used == segment->abandoned);
@@ -679,12 +700,9 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
   segment->thread_id = 0;
-  mi_segment_t* next;
-  do {
-    next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned));
-    mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next);
-  } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next));
-  mi_atomic_increment(&abandoned_count);
+  segment->abandoned_next = NULL;
+  mi_segments_prepend_abandoned(segment); // prepend one-element list
+  mi_atomic_increment(&abandoned_count);  // keep approximate count
 }
 
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@@ -701,24 +719,35 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
 }
 
 bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) {
-  uintptr_t reclaimed = 0;
-  uintptr_t atmost;
-  if (try_all) {
-    atmost = abandoned_count+16;   // close enough
-  }
-  else {
-    atmost = abandoned_count/8;    // at most 1/8th of all outstanding (estimated)
+  // To avoid the A-B-A problem, grab the entire list atomically
+  mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned));  // pre-read to avoid expensive atomic operations
+  if (segment == NULL) return false;
+  segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL);
+  if (segment == NULL) return false;
+
+  // we got a non-empty list
+  if (!try_all) {
+    // take at most 1/8th of the list and append the rest back to the abandoned list again
+    // this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem)
+    // and probably ok since the length will tend to be not too large.
+    uintptr_t atmost = mi_atomic_read(&abandoned_count)/8;  // at most 1/8th of all outstanding (estimated)
     if (atmost < 8) atmost = 8;    // but at least 8
+
+    // find the split point
+    mi_segment_t* last = segment;
+    while (last->abandoned_next != NULL && atmost > 0) {
+      last = last->abandoned_next;  
+      atmost--;
+    }
+    // split the list and push back the remaining segments
+    mi_segment_t* next = last->abandoned_next;
+    last->abandoned_next = NULL;
+    mi_segments_prepend_abandoned(next);
   }
 
-  // for `atmost` `reclaimed` abandoned segments...
-  while(atmost > reclaimed) {
-    // try to claim the head of the abandoned segments
-    mi_segment_t* segment;
-    do {
-      segment = (mi_segment_t*)abandoned;
-    } while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment));
-    if (segment==NULL) break; // stop early if no more segments available
+  // reclaim all segments that we kept
+  while(segment != NULL) {
+    mi_segment_t* const next = segment->abandoned_next; // save the next segment
 
     // got it.
     mi_atomic_decrement(&abandoned_count);
@@ -754,14 +783,17 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
       mi_segment_free(segment,false,tld);
     }
     else {
-      reclaimed++;
       // add its free pages to the the current thread free small segment queue
       if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) {
         mi_segment_insert_in_free_queue(segment,tld);
       }
     }
+
+    // go on
+    segment = next; 
   }
-  return (reclaimed>0);
+
+  return true;
 }
 
 
diff --git a/test/test-stress.c b/test/test-stress.c
index 924dbce1..23137b97 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -135,9 +135,9 @@ static void stress(intptr_t tid) {
       allocs--;
       if (data_top >= data_size) {
         data_size += 100000;
-        data = (void**)custom_realloc(data, data_size * sizeof(void*));
+        data = (void**)custom_realloc(data, data_size * sizeof(void*));        
       }
-      data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r);
+      data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
     }
     else {
       // 25% retain
@@ -209,7 +209,7 @@ int main(int argc, char** argv) {
     }
     mi_collect(false);
 #ifndef NDEBUG
-    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
+    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
 #endif
   }
 

From 940df53b0afc8b114676bf3fd41b9505db2abf0d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 17:51:11 -0800
Subject: [PATCH 082/104] fix iteration count display in stress test

---
 test/test-stress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index 23137b97..d295f741 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -209,7 +209,7 @@ int main(int argc, char** argv) {
     }
     mi_collect(false);
 #ifndef NDEBUG
-    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
+    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
   }
 

From 12ef2816ed71be907647a190f4139c6639d49dde Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 19:00:03 -0800
Subject: [PATCH 083/104] fix bug exposed by commit 59fa286 where reclaimed
 pages could be stuck to NEVER_DELAYED

---
 include/mimalloc-internal.h |  2 +-
 src/heap.c                  |  4 ++--
 src/page.c                  | 13 +++++++++----
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index cfbd9782..3042e6f9 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -92,7 +92,7 @@ void       _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);            //
 void       _mi_heap_delayed_free(mi_heap_t* heap);
 void       _mi_heap_collect_retired(mi_heap_t* heap, bool force);
 
-void       _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay);
+void       _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
 size_t     _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
 void       _mi_deferred_free(mi_heap_t* heap, bool force);
 
diff --git a/src/heap.c b/src/heap.c
index 963cb982..5c1f8d38 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -103,7 +103,7 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
   UNUSED(arg2);
   UNUSED(heap);
   UNUSED(pq);
-  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);
+  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
   return true; // don't break
 }
 
@@ -242,7 +242,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   UNUSED(pq);
 
   // ensure no more thread_delayed_free will be added
-  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);  
+  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);  
 
   // stats
   if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) {
diff --git a/src/page.c b/src/page.c
index 78570ab0..7491bd61 100644
--- a/src/page.c
+++ b/src/page.c
@@ -119,7 +119,7 @@ bool _mi_page_is_valid(mi_page_t* page) {
 }
 #endif
 
-void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) {
+void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
   mi_thread_free_t tfree;
   mi_thread_free_t tfreex;
   mi_delayed_t     old_delay;
@@ -133,11 +133,13 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) {
     else if (delay == old_delay) {
       break; // avoid atomic operation if already equal
     }
+    else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
+      break; // leave never set
+    }
   } while ((old_delay == MI_DELAYED_FREEING) ||
     !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree));
 }
 
-
 /* -----------------------------------------------------------
   Page collect the `local_free` and `thread_free` lists
 ----------------------------------------------------------- */
@@ -229,9 +231,12 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_internal(page->heap == NULL);
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
   mi_assert_internal(!page->is_reset);
+  mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE);
   _mi_page_free_collect(page,false);
   mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
   mi_page_queue_push(heap, pq, page);
+  mi_assert_internal(page->heap != NULL);
+  _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set)
   mi_assert_expensive(_mi_page_is_valid(page));
 }
 
@@ -308,7 +313,7 @@ void _mi_page_unfull(mi_page_t* page) {
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(mi_page_is_in_full(page));
 
-  _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE);
+  _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false);
   if (!mi_page_is_in_full(page)) return;
 
   mi_heap_t* heap = page->heap;
@@ -324,7 +329,7 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_internal(!mi_page_immediate_available(page));
   mi_assert_internal(!mi_page_is_in_full(page));
 
-  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE);
+  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
   if (mi_page_is_in_full(page)) return;
 
   mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page);

From 8f75444e7a07d8a6a56302855ad1094121bd4c90 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:21:32 -0800
Subject: [PATCH 084/104] fix windows debug build at MI_DEBUG=2

---
 src/heap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/heap.c b/src/heap.c
index 5c1f8d38..4a589e5c 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -45,7 +45,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
 }
 
 
-#if MI_DEBUG>=3
+#if MI_DEBUG>=2
 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
   UNUSED(arg1);
   UNUSED(arg2);

From 403276d11e10bebb1d20c93b210258de3f02d995 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:27:18 -0800
Subject: [PATCH 085/104] build release and debug build on Windows

---
 azure-pipelines.yml | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 41d67f86..5056ee34 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -13,16 +13,24 @@ jobs:
   pool:
     vmImage:
      windows-2019
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
   steps:
   - task: CMake@1
     inputs:
-      workingDirectory: 'build'
-      cmakeArgs: ..
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
   - task: MSBuild@1
     inputs:
-      solution: build/libmimalloc.sln
-  - upload: $(Build.SourcesDirectory)/build
-    artifact: windows
+      solution: $(BuildType)/libmimalloc.sln
+  - upload: $(Build.SourcesDirectory)/$(BuildType)
+    artifact: mimalloc-windows-$(BuildType)
 
 - job:
   displayName: Linux
@@ -75,7 +83,7 @@ jobs:
     displayName: Ctest
 
   - upload: $(Build.SourcesDirectory)/$(BuildType)
-    artifact: ubuntu-$(BuildType)
+    artifact: mimalloc-ubuntu-$(BuildType)
 
 - job:
   displayName: macOS
@@ -89,4 +97,4 @@ jobs:
       cmakeArgs: ..
   - script: make -j$(sysctl -n hw.ncpu) -C build
   - upload: $(Build.SourcesDirectory)/build
-    artifact: macos
+    artifact: mimalloc-macos

From ce3f327f211418aaaac874a961ea92fe1fb8e013 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:40:57 -0800
Subject: [PATCH 086/104] add test pass to Windows build

---
 azure-pipelines.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 5056ee34..b9376e52 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -29,6 +29,7 @@ jobs:
   - task: MSBuild@1
     inputs:
       solution: $(BuildType)/libmimalloc.sln
+  - task: CTest@1
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-windows-$(BuildType)
 

From 7575b58d7ac4abe84b16c4befefdfe1618ce4347 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:46:56 -0800
Subject: [PATCH 087/104] fix test on Windows in azure pipelines

---
 azure-pipelines.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index b9376e52..9da5ffa5 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -29,7 +29,10 @@ jobs:
   - task: MSBuild@1
     inputs:
       solution: $(BuildType)/libmimalloc.sln
-  - task: CTest@1
+  - displayName: CTest
+    script: |
+      cd $(BuildType)
+      ctest
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-windows-$(BuildType)
 

From 313d4b8ffd1bb741a3f4ab7b883b71e4913c8c5d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:47:40 -0800
Subject: [PATCH 088/104] fix test on Windows in azure pipelines

---
 azure-pipelines.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 9da5ffa5..ad5f42cb 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -29,10 +29,10 @@ jobs:
   - task: MSBuild@1
     inputs:
       solution: $(BuildType)/libmimalloc.sln
-  - displayName: CTest
-    script: |
+  - script: |
       cd $(BuildType)
       ctest
+    displayName: CTest      
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-windows-$(BuildType)
 

From be10ebea35652e7cde14c42a8a9ab972efaafb9c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 8 Jan 2020 23:54:56 -0800
Subject: [PATCH 089/104] build debug and secure versions on macOS in Azure
 pipelines

---
 azure-pipelines.yml | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index ad5f42cb..f88b2e1a 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -21,6 +21,9 @@ jobs:
       Release:
         BuildType: release
         cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+      Secure:
+        BuildType: secure
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
   steps:
   - task: CMake@1
     inputs:
@@ -32,7 +35,7 @@ jobs:
   - script: |
       cd $(BuildType)
       ctest
-    displayName: CTest      
+    displayName: CTest
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-windows-$(BuildType)
 
@@ -73,19 +76,15 @@ jobs:
         CXX: clang++
         BuildType: secure-clang
         cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
-
   steps:
   - task: CMake@1
     inputs:
       workingDirectory: $(BuildType)
       cmakeArgs: .. $(cmakeExtraArgs)
-
   - script: make -j$(nproc) -C $(BuildType)
     displayName: Make
-
   - script: make test -C $(BuildType)
-    displayName: Ctest
-
+    displayName: CTest
   - upload: $(Build.SourcesDirectory)/$(BuildType)
     artifact: mimalloc-ubuntu-$(BuildType)
 
@@ -94,11 +93,25 @@ jobs:
   pool:
     vmImage:
      macOS-10.14
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+      Secure:
+        BuildType: secure
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
   steps:
   - task: CMake@1
     inputs:
-      workingDirectory: 'build'
-      cmakeArgs: ..
-  - script: make -j$(sysctl -n hw.ncpu) -C build
-  - upload: $(Build.SourcesDirectory)/build
-    artifact: mimalloc-macos
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
+    displayName: Make
+  - script: make test -C $(BuildType)
+    displayName: CTest
+  - upload: $(Build.SourcesDirectory)/$(BuildType)
+    artifact: mimalloc-macos-$(BuildType)

From 5f61a9e89673c6a361b4b34b4db258181e8e415b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 9 Jan 2020 17:52:28 -0800
Subject: [PATCH 090/104] add mprotect error when the mmap limit might be
 reached in secure mode (see issue #77)

---
 src/os.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/os.c b/src/os.c
index c9a04d27..b5bd0ad9 100644
--- a/src/os.c
+++ b/src/os.c
@@ -596,6 +596,18 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
   return mi_os_page_align_areax(true, addr, size, newsize);
 }
 
+static void mi_mprotect_hint(int err) {
+#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page
+  if (err == ENOMEM) {
+    _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n"
+                        "  On Linux this is controlled by the vm.max_map_count. For example:\n"
+                        "  > sudo sysctl -w vm.max_map_count=262144\n");
+  }
+#else
+  UNUSED(err);
+#endif
+}
+
 // Commit/Decommit memory.
 // Usuelly commit is aligned liberal, while decommit is aligned conservative.
 // (but not for the reset version where we want commit to be conservative as well)
@@ -644,6 +656,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   #endif
   if (err != 0) {
     _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
+    mi_mprotect_hint(err);
   }
   mi_assert_internal(err == 0);
   return (err == 0);
@@ -762,6 +775,7 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
 #endif
   if (err != 0) {
     _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err);
+    mi_mprotect_hint(err);
   }
   return (err == 0);
 }

From 65f4f5144bef1a7145ac95a147ac01c7751a9310 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 13 Jan 2020 17:06:25 -0800
Subject: [PATCH 091/104] fix out-of-bounds error in huge OS page bitmap

---
 src/arena.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b5d41a1a..7f1a1caf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -282,10 +282,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
     _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
     return ENOMEM;
   }
-  _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
+  _mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages);
 
   size_t bcount = mi_block_count_of_size(hsize);
-  size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
+  size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
   size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
   mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) {
@@ -300,11 +300,12 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   arena->is_zero_init = true;
   arena->is_committed = true;
   arena->search_idx = 0;
-  arena->blocks_dirty = &arena->blocks_inuse[bcount];
+  arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
   arena->blocks_committed = NULL;
   // the bitmaps are already zero initialized due to os_alloc
   // just claim leftover blocks if needed
-  size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
+  ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
+  mi_assert_internal(post >= 0);
   if (post > 0) {
     // don't use leftover bits at the end
     mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);

From 941c55ee42e1b3a14b27a1df1ceab3ebfcbcf46d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 14 Jan 2020 21:47:18 -0800
Subject: [PATCH 092/104] wip: first implementation of page free list on
 segments for effecient delayed page reset

---
 include/mimalloc-types.h |   2 +
 src/init.c               |   7 +-
 src/options.c            |   2 +-
 src/segment.c            | 307 +++++++++++++++++++++++++++++++--------
 4 files changed, 251 insertions(+), 67 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index da9bfbac..51306808 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -417,6 +417,8 @@ typedef struct mi_os_tld_s {
 typedef struct mi_segments_tld_s {
   mi_segment_queue_t  small_free;   // queue of segments with free small pages
   mi_segment_queue_t  medium_free;  // queue of segments with free medium pages
+  mi_page_queue_t     small_pages_free;   // page queue of free small pages 
+  mi_page_queue_t     medium_pages_free;  // page queue of free medium pages 
   size_t              count;        // current number of segments;
   size_t              peak_count;   // peak number of segments
   size_t              current_size; // current size of all segments
diff --git a/src/init.c b/src/init.c
index 3df854cf..085a5011 100644
--- a/src/init.c
+++ b/src/init.c
@@ -105,9 +105,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
 static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
-  { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
+  { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, {NULL ,NULL, 0}, 
+    0, 0, 0, 0, 0, 0, NULL, 
+    tld_main_stats, tld_main_os 
+  }, // segments
   { 0, tld_main_stats },  // os
-  { MI_STATS_NULL }             // stats
+  { MI_STATS_NULL }       // stats
 };
 
 #if MI_INTPTR_SIZE==8
diff --git a/src/options.c b/src/options.c
index 0d3bd393..77205713 100644
--- a/src/options.c
+++ b/src/options.c
@@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 500, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
+  { 100, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes.
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
diff --git a/src/segment.c b/src/segment.c
index 97859fa9..fb5ea0ec 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -43,7 +43,7 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_
 ----------------------------------------------------------- */
 
 #if (MI_DEBUG>=3)
-static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) {
+static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_segment_t* segment) {
   mi_assert_internal(segment != NULL);
   mi_segment_t* list = queue->first;
   while (list != NULL) {
@@ -90,7 +90,7 @@ static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi
   else return NULL;
 }
 
-static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) {
+static mi_segment_queue_t* mi_segment_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) {
   return mi_segment_free_queue_of_kind(segment->page_kind, tld);
 }
 
@@ -113,7 +113,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t
 ----------------------------------------------------------- */
 
 #if (MI_DEBUG>=2)
-static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) {
+static bool mi_segment_is_in_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld);
   bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment));
   if (in_queue) {
@@ -123,7 +123,7 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t
 }
 #endif
 
-static size_t mi_segment_page_size(mi_segment_t* segment) {
+static size_t mi_segment_page_size(const mi_segment_t* segment) {
   if (segment->capacity > 1) {
     mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM);
     return ((size_t)1 << segment->page_shift);
@@ -134,15 +134,39 @@ static size_t mi_segment_page_size(mi_segment_t* segment) {
   }
 }
 
+static mi_page_queue_t* mi_segment_page_free_queue(mi_page_kind_t kind, mi_segments_tld_t* tld) {
+  if (kind==MI_PAGE_SMALL) return &tld->small_pages_free;
+  else if (kind==MI_PAGE_MEDIUM) return &tld->medium_pages_free;
+  else return NULL;
+}
+
+
 #if (MI_DEBUG>=3)
-static bool mi_segment_is_valid(mi_segment_t* segment) {
+static bool mi_segment_page_free_contains(mi_page_kind_t kind, const mi_page_t* page, mi_segments_tld_t* tld) {
+  const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld);
+  if (pq == NULL) return false;
+  mi_page_t* p = pq->first;
+  while (p != NULL) {
+    if (p == page) return true;
+    p = p->next;
+  }
+  return false;
+}
+
+static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment != NULL);
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(segment->used <= segment->capacity);
   mi_assert_internal(segment->abandoned <= segment->used);
   size_t nfree = 0;
   for (size_t i = 0; i < segment->capacity; i++) {
-    if (!segment->pages[i].segment_in_use) nfree++;
+    const mi_page_t* const page = &segment->pages[i];
+    if (!page->segment_in_use) {
+      nfree++;      
+    }
+    else {
+      mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld));
+    }
   }
   mi_assert_internal(nfree + segment->used == segment->capacity);
   mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
@@ -152,6 +176,20 @@ static bool mi_segment_is_valid(mi_segment_t* segment) {
 }
 #endif
 
+static bool mi_segment_page_free_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) {
+  mi_page_kind_t kind = _mi_page_segment(page)->page_kind;
+  if (page->next != NULL || page->prev != NULL) {
+    mi_assert_internal(mi_segment_page_free_contains(kind, page, tld));
+    return false;
+  }
+  if (kind > MI_PAGE_MEDIUM) return true;
+  // both next and prev are NULL, check for singleton list
+  const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld);
+  mi_assert_internal(pq!=NULL);
+  return (pq->first != page && pq->last != page);
+}
+
+
 /* -----------------------------------------------------------
   Guard pages
 ----------------------------------------------------------- */
@@ -232,6 +270,102 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size,
 }
 
 
+/* -----------------------------------------------------------
+  The free page queue
+----------------------------------------------------------- */
+
+static void mi_segment_page_free_set_expire(mi_page_t* page) {
+  *((intptr_t*)(&page->heap)) = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+}
+
+static mi_msecs_t mi_segment_page_free_get_expire(mi_page_t* page) {
+  return *((intptr_t*)(&page->heap));
+}
+
+static void mi_segment_page_free_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
+  mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM);
+  mi_assert_internal(!page->segment_in_use);
+  mi_assert_internal(_mi_page_segment(page) == segment);
+  mi_assert_internal(mi_segment_page_free_not_in_queue(page,tld));
+  mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld));
+  mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld);
+  // push on top 
+  mi_segment_page_free_set_expire(page);
+  page->next = pq->first;
+  page->prev = NULL;
+  if (pq->first == NULL) {
+    mi_assert_internal(pq->last == NULL);
+    pq->first = pq->last = page;
+  }
+  else {
+    pq->first->prev = page;
+    pq->first = page;
+  }
+}
+
+static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
+  if (segment->page_kind > MI_PAGE_MEDIUM) return;
+  if (mi_segment_page_free_not_in_queue(page,tld)) return;
+
+  mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld);
+  mi_assert_internal(pq!=NULL);
+  mi_assert_internal(_mi_page_segment(page)==segment);
+  mi_assert_internal(!page->segment_in_use);
+  mi_assert_internal(mi_segment_page_free_contains(segment->page_kind, page, tld));  
+  if (page->prev != NULL) page->prev->next = page->next;
+  if (page->next != NULL) page->next->prev = page->prev;
+  if (page == pq->last)  pq->last = page->prev;
+  if (page == pq->first) pq->first = page->next;
+  page->next = page->prev = NULL;
+  page->heap = NULL;
+}
+
+static void mi_segment_page_free_remove_all(mi_segment_t* segment, mi_segments_tld_t* tld) {
+  if (segment->page_kind > MI_PAGE_MEDIUM) return;
+  for (size_t i = 0; i < segment->capacity; i++) {
+    mi_page_t* page = &segment->pages[i];
+    if (!page->segment_in_use) {
+      mi_segment_page_free_remove(segment, page, tld);
+    }
+  }
+}
+
+static mi_page_t* mi_segment_page_free_top(mi_page_kind_t kind, mi_segments_tld_t* tld) {
+  mi_assert_internal(kind <= MI_PAGE_MEDIUM);
+  mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld);
+  return pq->first;
+}
+
+static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t kind, mi_segments_tld_t* tld) { 
+  mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld);
+  mi_assert_internal(pq != NULL);
+  mi_page_t* page = pq->last;
+  while (page != NULL && (now - mi_segment_page_free_get_expire(page)) >= 0) {
+    mi_page_t* const prev = page->prev;
+    mi_page_reset(_mi_page_segment(page), page, 0, tld);
+    page->heap = NULL;
+    page->prev = page->next = NULL;
+    page = prev;
+  }
+  pq->last = page;
+  if (page != NULL){
+    page->next = NULL;
+  } 
+  else {
+    pq->first = NULL;
+  }
+}
+
+static void mi_segment_page_free_reset_delayed(mi_segments_tld_t* tld) {
+  if (!mi_option_is_enabled(mi_option_page_reset)) return;
+  mi_msecs_t now = _mi_clock_now();
+  mi_segment_page_free_reset_delayedx(now, MI_PAGE_SMALL, tld);
+  mi_segment_page_free_reset_delayedx(now, MI_PAGE_MEDIUM, tld);
+}
+
+
+
+
 /* -----------------------------------------------------------
  Segment size calculations
 ----------------------------------------------------------- */
@@ -407,6 +541,10 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
   }
   mi_assert_internal(tld->cache_count == 0);
   mi_assert_internal(tld->cache == NULL);
+  mi_assert_internal(tld->small_pages_free.first == NULL);
+  mi_assert_internal(tld->medium_pages_free.first == NULL);
+  mi_assert_internal(tld->small_free.first == NULL);
+  mi_assert_internal(tld->medium_free.first == NULL);
 }
 
 
@@ -532,9 +670,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
 
 
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
-  UNUSED(force);
-  //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment);
+  UNUSED(force);  
   mi_assert(segment != NULL);
+  mi_segment_page_free_remove_all(segment, tld);
   mi_segment_remove_from_free_queue(segment,tld);
 
   mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment));
@@ -561,37 +699,38 @@ static bool mi_segment_has_free(const mi_segment_t* segment) {
   return (segment->used < segment->capacity);
 }
 
-static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  mi_assert_internal(mi_segment_has_free(segment));
-  mi_assert_expensive(mi_segment_is_valid(segment));
-  for (size_t i = 0; i < segment->capacity; i++) {
-    mi_page_t* page = &segment->pages[i];
-    if (!page->segment_in_use) {
-      // set in-use before doing unreset to prevent delayed reset
-      page->segment_in_use = true;
-      segment->used++;
-      if (!page->is_committed) {
-        mi_assert_internal(!segment->mem_is_fixed);
-        mi_assert_internal(!page->is_reset);
-        page->is_committed = true;
-        if (segment->page_kind < MI_PAGE_LARGE || !mi_option_is_enabled(mi_option_eager_page_commit)) {
-          size_t psize;
-          uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
-          bool is_zero = false;
-          const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0);
-          _mi_mem_commit(start, psize + gsize, &is_zero, tld->os);
-          if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); }
-          if (is_zero) { page->is_zero_init = true; }
-        }
-      }
-      if (page->is_reset) {
-        mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset?
-      }
-      return page;
+static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
+  mi_assert_internal(_mi_page_segment(page) == segment);
+  mi_assert_internal(!page->segment_in_use);    
+  // set in-use before doing unreset to prevent delayed reset
+  mi_segment_page_free_remove(segment, page, tld);
+  page->segment_in_use = true;
+  segment->used++;
+  if (!page->is_committed) {
+    mi_assert_internal(!segment->mem_is_fixed);
+    mi_assert_internal(!page->is_reset);
+    page->is_committed = true;
+    if (segment->page_kind < MI_PAGE_LARGE
+      || !mi_option_is_enabled(mi_option_eager_page_commit)) {
+      size_t psize;
+      uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
+      bool is_zero = false;
+      const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0);
+      _mi_mem_commit(start, psize + gsize, &is_zero, tld->os);
+      if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); }
+      if (is_zero) { page->is_zero_init = true; }
     }
   }
-  mi_assert(false);
-  return NULL;
+  if (page->is_reset) {
+    mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset?
+  }
+  mi_assert_internal(page->segment_in_use);
+  mi_assert_internal(segment->used <= segment->capacity);
+  if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) {
+    // if no more free pages, remove from the queue
+    mi_assert_internal(!mi_segment_has_free(segment));
+    mi_segment_remove_from_free_queue(segment, tld);
+  }
 }
 
 
@@ -605,6 +744,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   mi_assert_internal(page->segment_in_use);
   mi_assert_internal(mi_page_all_free(page));
   mi_assert_internal(page->is_committed);
+  mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld));
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
@@ -619,19 +759,27 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
 
   // reset the page memory to reduce memory pressure?
   // note: must come after setting `segment_in_use` to false but before block_size becomes 0
-  mi_page_reset(segment, page, 0 /*used_size*/, tld);
+  //mi_page_reset(segment, page, 0 /*used_size*/, tld);
 
-  // zero the page data, but not the segment fields
+  // zero the page data, but not the segment fields and block_size (for page size calculations)
+  size_t block_size = page->block_size;
   ptrdiff_t ofs = offsetof(mi_page_t,capacity);
   memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
+  page->block_size = block_size;
   segment->used--;
+
+  // add to the free page list for reuse/reset
+  if (segment->page_kind <= MI_PAGE_MEDIUM) {
+    mi_segment_page_free_add(segment, page, tld);
+  }
 }
 
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 {
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_expensive(mi_segment_is_valid(segment));
+  mi_assert_expensive(mi_segment_is_valid(segment,tld));
+  mi_segment_page_free_reset_delayed(tld);
 
   // mark it as free now
   mi_segment_page_clear(segment, page, tld);
@@ -690,10 +838,12 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used == segment->abandoned);
   mi_assert_internal(segment->used > 0);
   mi_assert_internal(segment->abandoned_next == NULL);
-  mi_assert_expensive(mi_segment_is_valid(segment));
+  mi_assert_expensive(mi_segment_is_valid(segment,tld));
 
   // remove the segment from the free page queue if needed
-  mi_segment_remove_from_free_queue(segment,tld);
+  mi_segment_page_free_reset_delayed(tld);
+  mi_segment_page_free_remove_all(segment, tld);
+  mi_segment_remove_from_free_queue(segment, tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
 
   // all pages in the segment are abandoned; add it to the abandoned list
@@ -708,7 +858,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_expensive(mi_segment_is_valid(segment));
+  mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld));
+  mi_assert_expensive(mi_segment_is_valid(segment,tld));
   segment->abandoned++;
   _mi_stat_increase(&tld->stats->pages_abandoned, 1);
   mi_assert_internal(segment->abandoned <= segment->used);
@@ -755,7 +906,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     segment->abandoned_next = NULL;
     mi_segments_track_size((long)segment->segment_size,tld);
     mi_assert_internal(segment->next == NULL && segment->prev == NULL);
-    mi_assert_expensive(mi_segment_is_valid(segment));
+    mi_assert_expensive(mi_segment_is_valid(segment,tld));
     _mi_stat_decrease(&tld->stats->segments_abandoned,1);
 
     // add its abandoned pages to the current thread
@@ -765,6 +916,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
       if (page->segment_in_use) {
         mi_assert_internal(!page->is_reset);
         mi_assert_internal(page->is_committed);
+        mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld));
         segment->abandoned--;
         mi_assert(page->next == NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
@@ -801,30 +953,55 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
    Small page allocation
 ----------------------------------------------------------- */
 
-// Allocate a small page inside a segment.
-// Requires that the page has free pages
-static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
+
+static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
-  mi_page_t* page = mi_segment_find_free(segment, tld);
-  mi_assert_internal(page->segment_in_use);
-  mi_assert_internal(segment->used <= segment->capacity);
-  if (segment->used == segment->capacity) {
-    // if no more free pages, remove from the queue
-    mi_assert_internal(!mi_segment_has_free(segment));
-    mi_segment_remove_from_free_queue(segment,tld);
+  mi_assert_expensive(mi_segment_is_valid(segment, tld));
+  for (size_t i = 0; i < segment->capacity; i++) {
+    mi_page_t* page = &segment->pages[i];
+    if (!page->segment_in_use) {
+      mi_segment_page_claim(segment, page, tld);
+      return page;
+    }
   }
-  return page;
+  mi_assert(false);
+  return NULL;
 }
 
-static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
-  mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld);
-  if (mi_segment_queue_is_empty(free_queue)) {
-    mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld);
-    if (segment == NULL) return NULL;
-    mi_segment_enqueue(free_queue, segment);
+
+// Allocate a page inside a segment. Requires that the page has free pages
+static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
+  mi_assert_internal(mi_segment_has_free(segment));
+  return mi_segment_find_free(segment, tld);  
+}
+
+static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {  
+  mi_page_t* page = NULL;
+  mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld);
+  if (free_queue->first != NULL && free_queue->first->used < free_queue->first->capacity) {
+    // prefer to allocate from an available segment 
+    // (to allow more chance of other segments to become completely freed)
+    page = mi_segment_page_alloc_in(free_queue->first, tld);
   }
-  mi_assert_internal(free_queue->first != NULL);
-  mi_page_t* page = mi_segment_page_alloc_in(free_queue->first,tld);
+  else {
+    // otherwise try to pop from the page free list
+    page = mi_segment_page_free_top(kind, tld);
+    if (page != NULL) {
+      mi_segment_page_claim(_mi_page_segment(page), page, tld);
+    }
+    else {
+      // if that failed, find an available segment the segment free queue again
+      if (mi_segment_queue_is_empty(free_queue)) {
+        // possibly allocate a fresh segment
+        mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld);
+        if (segment == NULL) return NULL;  // return NULL if out-of-memory
+        mi_segment_enqueue(free_queue, segment);
+      }
+      mi_assert_internal(free_queue->first != NULL);
+      page = mi_segment_page_alloc_in(free_queue->first, tld);
+    }
+  }
+  mi_assert_internal(page != NULL);
 #if MI_DEBUG>=2
   _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0;
 #endif
@@ -883,7 +1060,9 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
   else {
     page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
   }
-  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page)));
+  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
   mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
+  mi_segment_page_free_reset_delayed(tld);
+  mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld));
   return page;
 }

From f92a2a72649b568a7d359f6b05f315c2919bc8c8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 15 Jan 2020 10:18:32 -0800
Subject: [PATCH 093/104] add argument pointer to the register output routine

---
 include/mimalloc.h |  4 ++--
 src/options.c      | 37 ++++++++++++++++++++++---------------
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 94d9edfc..08af2eb9 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -111,8 +111,8 @@ mi_decl_export size_t mi_good_size(size_t size)       mi_attr_noexcept;
 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat);
 mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept;
 
-typedef void (mi_output_fun)(const char* msg);
-mi_decl_export void mi_register_output(mi_output_fun* out) mi_attr_noexcept;
+typedef void (mi_output_fun)(const char* msg, void* arg);
+mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept;
 
 mi_decl_export void mi_collect(bool force)    mi_attr_noexcept;
 mi_decl_export int  mi_version(void)          mi_attr_noexcept;
diff --git a/src/options.c b/src/options.c
index 0d3bd393..ed1237d1 100644
--- a/src/options.c
+++ b/src/options.c
@@ -140,7 +140,8 @@ void mi_option_disable(mi_option_t option) {
 }
 
 
-static void mi_out_stderr(const char* msg) {
+static void mi_out_stderr(const char* msg, void* arg) {
+  UNUSED(arg);
   #ifdef _WIN32
   // on windows with redirection, the C runtime cannot handle locale dependent output
   // after the main thread closes so we use direct console output.
@@ -160,7 +161,8 @@ static void mi_out_stderr(const char* msg) {
 static char out_buf[MI_MAX_DELAY_OUTPUT+1];
 static _Atomic(uintptr_t) out_len;
 
-static void mi_out_buf(const char* msg) {
+static void mi_out_buf(const char* msg, void* arg) {
+  UNUSED(arg);
   if (msg==NULL) return;
   if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
   size_t n = strlen(msg);
@@ -175,14 +177,14 @@ static void mi_out_buf(const char* msg) {
   memcpy(&out_buf[start], msg, n);
 }
 
-static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
+static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) {
   if (out==NULL) return;
   // claim (if `no_more_buf == true`, no more output will be added after this point)
   size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
   // and output the current contents
   if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
   out_buf[count] = 0;
-  out(out_buf);
+  out(out_buf,arg);
   if (!no_more_buf) {
     out_buf[count] = '\n'; // if continue with the buffer, insert a newline
   }
@@ -191,9 +193,9 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
 
 // Once this module is loaded, switch to this routine
 // which outputs to stderr and the delayed output buffer.
-static void mi_out_buf_stderr(const char* msg) {
-  mi_out_stderr(msg);
-  mi_out_buf(msg);
+static void mi_out_buf_stderr(const char* msg, void* arg) {
+  mi_out_stderr(msg,arg);
+  mi_out_buf(msg,arg);
 }
 
 
@@ -206,21 +208,25 @@ static void mi_out_buf_stderr(const char* msg) {
 // For now, don't register output from multiple threads.
 #pragma warning(suppress:4180)
 static mi_output_fun* volatile mi_out_default; // = NULL
+static volatile _Atomic(void*) mi_out_arg; // = NULL
 
-static mi_output_fun* mi_out_get_default(void) {
+static mi_output_fun* mi_out_get_default(void** parg) {
+  if (parg != NULL) { *parg = mi_atomic_read_ptr(&mi_out_arg); }
   mi_output_fun* out = mi_out_default;
   return (out == NULL ? &mi_out_buf : out);
 }
 
-void mi_register_output(mi_output_fun* out) mi_attr_noexcept {
+void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept {
   mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer
-  if (out!=NULL) mi_out_buf_flush(out,true);             // output all the delayed output now
+  mi_atomic_write_ptr(&mi_out_arg, arg);
+  if (out!=NULL) mi_out_buf_flush(out,true,arg);         // output all the delayed output now
 }
 
 // add stderr to the delayed output after the module is loaded
 static void mi_add_stderr_output() {
-  mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr
-  mi_out_default = &mi_out_buf_stderr;     // and add stderr to the delayed output
+  mi_assert_internal(mi_out_default == NULL);
+  mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr
+  mi_out_default = &mi_out_buf_stderr;           // and add stderr to the delayed output
 }
 
 // --------------------------------------------------------
@@ -234,10 +240,11 @@ static mi_decl_thread bool recurse = false;
 
 void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) {
   if (recurse) return;
-  if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default();
+  void* arg = NULL;
+  if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(&arg);
   recurse = true;
-  if (prefix != NULL) out(prefix);
-  out(message);
+  if (prefix != NULL) out(prefix,arg);
+  out(message,arg);
   recurse = false;
   return;
 }

From 0956a05bf6fc731e811a8696364caffd5b7e6da3 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 15 Jan 2020 10:21:32 -0800
Subject: [PATCH 094/104] add argument pointer to the register deferred free
 callback

---
 include/mimalloc.h | 4 ++--
 src/page.c         | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 08af2eb9..1f6f1ef7 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -108,8 +108,8 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize)
 mi_decl_export size_t mi_usable_size(const void* p)   mi_attr_noexcept;
 mi_decl_export size_t mi_good_size(size_t size)       mi_attr_noexcept;
 
-typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat);
-mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept;
+typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
+mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept;
 
 typedef void (mi_output_fun)(const char* msg, void* arg);
 mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept;
diff --git a/src/page.c b/src/page.c
index 7491bd61..6a6e09d6 100644
--- a/src/page.c
+++ b/src/page.c
@@ -764,18 +764,20 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
 ----------------------------------------------------------- */
 
 static mi_deferred_free_fun* volatile deferred_free = NULL;
+static volatile _Atomic(void*) deferred_arg; // = NULL
 
 void _mi_deferred_free(mi_heap_t* heap, bool force) {
   heap->tld->heartbeat++;
   if (deferred_free != NULL && !heap->tld->recurse) {
     heap->tld->recurse = true;
-    deferred_free(force, heap->tld->heartbeat);
+    deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(&deferred_arg));
     heap->tld->recurse = false;
   }
 }
 
-void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
+void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept {
   deferred_free = fn;
+  mi_atomic_write_ptr(&deferred_arg, arg);
 }
 
 

From 783e3377f79ee82af43a0793910a9f2d01ac7863 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 15 Jan 2020 10:53:54 -0800
Subject: [PATCH 095/104] add output argument to stat printing

---
 include/mimalloc-internal.h |   4 +-
 include/mimalloc.h          |   5 +-
 src/init.c                  |   2 +-
 src/options.c               |  27 +++---
 src/stats.c                 | 160 ++++++++++++++++++------------------
 test/main-override-static.c |   4 +-
 test/test-stress.c          |   4 +-
 7 files changed, 104 insertions(+), 102 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 3042e6f9..d5ce9f59 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -33,8 +33,8 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 // "options.c"
-void       _mi_fputs(mi_output_fun* out, const char* prefix, const char* message);
-void       _mi_fprintf(mi_output_fun* out, const char* fmt, ...);
+void       _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
+void       _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
 void       _mi_error_message(const char* fmt, ...);
 void       _mi_warning_message(const char* fmt, ...);
 void       _mi_verbose_message(const char* fmt, ...);
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 1f6f1ef7..51d96609 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -118,12 +118,13 @@ mi_decl_export void mi_collect(bool force)    mi_attr_noexcept;
 mi_decl_export int  mi_version(void)          mi_attr_noexcept;
 mi_decl_export void mi_stats_reset(void)      mi_attr_noexcept;
 mi_decl_export void mi_stats_merge(void)      mi_attr_noexcept;
-mi_decl_export void mi_stats_print(mi_output_fun* out) mi_attr_noexcept;
+mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept;  // backward compatibility: `out` is ignored and should be NULL
+mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
 
 mi_decl_export void mi_process_init(void)     mi_attr_noexcept;
 mi_decl_export void mi_thread_init(void)      mi_attr_noexcept;
 mi_decl_export void mi_thread_done(void)      mi_attr_noexcept;
-mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept;
+mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
 
 
 // -------------------------------------------------------------------------------------
diff --git a/src/init.c b/src/init.c
index 3df854cf..79e1e044 100644
--- a/src/init.c
+++ b/src/init.c
@@ -390,7 +390,7 @@ static void mi_process_load(void) {
   const char* msg = NULL;
   mi_allocator_init(&msg);
   if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
-    _mi_fputs(NULL,NULL,msg);
+    _mi_fputs(NULL,NULL,NULL,msg);
   }
 }
 
diff --git a/src/options.c b/src/options.c
index ed1237d1..017b9d59 100644
--- a/src/options.c
+++ b/src/options.c
@@ -238,10 +238,11 @@ static volatile _Atomic(uintptr_t) error_count; // = 0;  // when MAX_ERROR_COUNT
 // inside the C runtime causes another message.
 static mi_decl_thread bool recurse = false;
 
-void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) {
+void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
   if (recurse) return;
-  void* arg = NULL;
-  if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(&arg);
+  if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
+    out = mi_out_get_default(&arg);
+  }
   recurse = true;
   if (prefix != NULL) out(prefix,arg);
   out(message,arg);
@@ -251,21 +252,21 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) {
 
 // Define our own limited `fprintf` that avoids memory allocation.
 // We do this using `snprintf` with a limited buffer.
-static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) {
+static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
   char buf[512];
   if (fmt==NULL) return;
   if (recurse) return;
   recurse = true;
   vsnprintf(buf,sizeof(buf)-1,fmt,args);
   recurse = false;
-  _mi_fputs(out,prefix,buf);
+  _mi_fputs(out,arg,prefix,buf);
 }
 
 
-void _mi_fprintf( mi_output_fun* out, const char* fmt, ... ) {
+void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
   va_list args;
   va_start(args,fmt);
-  mi_vfprintf(out,NULL,fmt,args);
+  mi_vfprintf(out,arg,NULL,fmt,args);
   va_end(args);
 }
 
@@ -273,7 +274,7 @@ void _mi_trace_message(const char* fmt, ...) {
   if (mi_option_get(mi_option_verbose) <= 1) return;  // only with verbose level 2 or higher
   va_list args;
   va_start(args, fmt);
-  mi_vfprintf(NULL, "mimalloc: ", fmt, args);
+  mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args);
   va_end(args);
 }
 
@@ -281,7 +282,7 @@ void _mi_verbose_message(const char* fmt, ...) {
   if (!mi_option_is_enabled(mi_option_verbose)) return;
   va_list args;
   va_start(args,fmt);
-  mi_vfprintf(NULL, "mimalloc: ", fmt, args);
+  mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args);
   va_end(args);
 }
 
@@ -290,7 +291,7 @@ void _mi_error_message(const char* fmt, ...) {
   if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
   va_list args;
   va_start(args,fmt);
-  mi_vfprintf(NULL, "mimalloc: error: ", fmt, args);
+  mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args);
   va_end(args);
   mi_assert(false);
 }
@@ -300,14 +301,14 @@ void _mi_warning_message(const char* fmt, ...) {
   if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
   va_list args;
   va_start(args,fmt);
-  mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args);
+  mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args);
   va_end(args);
 }
 
 
 #if MI_DEBUG
 void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) {
-  _mi_fprintf(NULL,"mimalloc: assertion failed: at \"%s\":%u, %s\n  assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
+  _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n  assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
   abort();
 }
 #endif
@@ -315,7 +316,7 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co
 mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
-  mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args);
+  mi_vfprintf(NULL, NULL, "mimalloc: fatal: ", fmt, args);
   va_end(args);
   #if (MI_SECURE>=0)
   abort();
diff --git a/src/stats.c b/src/stats.c
index cb6d8866..57599821 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -126,7 +126,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
 // unit > 0 : size in binary bytes 
 // unit == 0: count as decimal
 // unit < 0 : count in binary
-static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) {
+static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
   char buf[32];
   int  len = 32;
   const char* suffix = (unit <= 0 ? " " : "b");
@@ -147,75 +147,75 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const
     const long frac1 = (long)(tens%10);
     snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix);
   }
-  _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf);
+  _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf);
 }
 
 
-static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out) {
-  mi_printf_amount(n,unit,out,NULL);
+static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
+  mi_printf_amount(n,unit,out,arg,NULL);
 }
 
-static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out) {
-  if (unit==1) _mi_fprintf(out,"%11s"," ");
-          else mi_print_amount(n,0,out);
+static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
+  if (unit==1) _mi_fprintf(out, arg, "%11s"," ");
+          else mi_print_amount(n,0,out,arg);
 }
 
-static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out ) {
-  _mi_fprintf(out,"%10s:", msg);  
+static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) {
+  _mi_fprintf(out, arg,"%10s:", msg);
   if (unit>0) {
-    mi_print_amount(stat->peak, unit, out);
-    mi_print_amount(stat->allocated, unit, out);
-    mi_print_amount(stat->freed, unit, out);
-    mi_print_amount(unit, 1, out);
-    mi_print_count(stat->allocated, unit, out);
+    mi_print_amount(stat->peak, unit, out, arg);
+    mi_print_amount(stat->allocated, unit, out, arg);
+    mi_print_amount(stat->freed, unit, out, arg);
+    mi_print_amount(unit, 1, out, arg);
+    mi_print_count(stat->allocated, unit, out, arg);
     if (stat->allocated > stat->freed)
-      _mi_fprintf(out, "  not all freed!\n");
+      _mi_fprintf(out, arg, "  not all freed!\n");
     else
-      _mi_fprintf(out, "  ok\n");
+      _mi_fprintf(out, arg, "  ok\n");
   }
   else if (unit<0) {
-    mi_print_amount(stat->peak, -1, out);
-    mi_print_amount(stat->allocated, -1, out);
-    mi_print_amount(stat->freed, -1, out);
+    mi_print_amount(stat->peak, -1, out, arg);
+    mi_print_amount(stat->allocated, -1, out, arg);
+    mi_print_amount(stat->freed, -1, out, arg);
     if (unit==-1) {
-      _mi_fprintf(out, "%22s", "");
+      _mi_fprintf(out, arg, "%22s", "");
     }
     else {
-      mi_print_amount(-unit, 1, out);
-      mi_print_count((stat->allocated / -unit), 0, out);
+      mi_print_amount(-unit, 1, out, arg);
+      mi_print_count((stat->allocated / -unit), 0, out, arg);
     }
     if (stat->allocated > stat->freed)
-      _mi_fprintf(out, "  not all freed!\n");
+      _mi_fprintf(out, arg, "  not all freed!\n");
     else
-      _mi_fprintf(out, "  ok\n");
+      _mi_fprintf(out, arg, "  ok\n");
   }
   else {
-    mi_print_amount(stat->peak, 1, out);
-    mi_print_amount(stat->allocated, 1, out);
-    _mi_fprintf(out, "\n");
+    mi_print_amount(stat->peak, 1, out, arg);
+    mi_print_amount(stat->allocated, 1, out, arg);
+    _mi_fprintf(out, arg, "\n");
   }
 }
 
-static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out ) {
-  _mi_fprintf(out, "%10s:", msg);
-  mi_print_amount(stat->total, -1, out);
-  _mi_fprintf(out, "\n");
+static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
+  _mi_fprintf(out, arg, "%10s:", msg);
+  mi_print_amount(stat->total, -1, out, arg);
+  _mi_fprintf(out, arg, "\n");
 }
 
-static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) {
+static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) {
   const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); 
   const long avg_whole = (long)(avg_tens/10);
   const long avg_frac1 = (long)(avg_tens%10);
-  _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
+  _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
 }
 
 
-static void mi_print_header(mi_output_fun* out ) {
-  _mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak  ", "total  ", "freed  ", "unit  ", "count  ");
+static void mi_print_header(mi_output_fun* out, void* arg ) {
+  _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak  ", "total  ", "freed  ", "unit  ", "count  ");
 }
 
 #if MI_STAT>1
-static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out) {
+static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) {
   bool found = false;
   char buf[64];
   for (size_t i = 0; i <= max; i++) {
@@ -224,14 +224,14 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin
       int64_t unit = _mi_bin_size((uint8_t)i);
       snprintf(buf, 64, "%s %3zu", fmt, i);
       mi_stat_add(all, &bins[i], unit);
-      mi_stat_print(&bins[i], buf, unit, out);
+      mi_stat_print(&bins[i], buf, unit, out, arg);
     }
   }
   //snprintf(buf, 64, "%s all", fmt);
   //mi_stat_print(all, buf, 1);
   if (found) {
-    _mi_fprintf(out, "\n");
-    mi_print_header(out);
+    _mi_fprintf(out, arg, "\n");
+    mi_print_header(out, arg);
   }
 }
 #endif
@@ -239,40 +239,40 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin
 
 static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit);
 
-static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) mi_attr_noexcept {
-  mi_print_header(out);
+static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out, void* arg) mi_attr_noexcept {
+  mi_print_header(out,arg);
   #if MI_STAT>1
   mi_stat_count_t normal = { 0,0,0,0 };
-  mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out);
-  mi_stat_print(&normal, "normal", 1, out);
-  mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out);
-  mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out);
+  mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out,arg);
+  mi_stat_print(&normal, "normal", 1, out, arg);
+  mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg);
+  mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg);
   mi_stat_count_t total = { 0,0,0,0 };
   mi_stat_add(&total, &normal, 1);
   mi_stat_add(&total, &stats->huge, 1);
   mi_stat_add(&total, &stats->giant, 1);
-  mi_stat_print(&total, "total", 1, out);
-  _mi_fprintf(out, "malloc requested:     ");
-  mi_print_amount(stats->malloc.allocated, 1, out);
-  _mi_fprintf(out, "\n\n");
+  mi_stat_print(&total, "total", 1, out, arg);
+  _mi_fprintf(out, arg, "malloc requested:     ");
+  mi_print_amount(stats->malloc.allocated, 1, out, arg);
+  _mi_fprintf(out, arg, "\n\n");
   #endif
-  mi_stat_print(&stats->reserved, "reserved", 1, out);
-  mi_stat_print(&stats->committed, "committed", 1, out);
-  mi_stat_print(&stats->reset, "reset", 1, out);
-  mi_stat_print(&stats->page_committed, "touched", 1, out);
-  mi_stat_print(&stats->segments, "segments", -1, out);
-  mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out);
-  mi_stat_print(&stats->segments_cache, "-cached", -1, out);
-  mi_stat_print(&stats->pages, "pages", -1, out);
-  mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out);
-  mi_stat_counter_print(&stats->pages_extended, "-extended", out);
-  mi_stat_counter_print(&stats->page_no_retire, "-noretire", out);
-  mi_stat_counter_print(&stats->mmap_calls, "mmaps", out);
-  mi_stat_counter_print(&stats->commit_calls, "commits", out);
-  mi_stat_print(&stats->threads, "threads", -1, out);
-  mi_stat_counter_print_avg(&stats->searches, "searches", out);
-  _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count());
-  if (elapsed > 0) _mi_fprintf(out, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
+  mi_stat_print(&stats->reserved, "reserved", 1, out, arg);
+  mi_stat_print(&stats->committed, "committed", 1, out, arg);
+  mi_stat_print(&stats->reset, "reset", 1, out, arg);
+  mi_stat_print(&stats->page_committed, "touched", 1, out, arg);
+  mi_stat_print(&stats->segments, "segments", -1, out, arg);
+  mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
+  mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
+  mi_stat_print(&stats->pages, "pages", -1, out, arg);
+  mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg);
+  mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
+  mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg);
+  mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
+  mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
+  mi_stat_print(&stats->threads, "threads", -1, out, arg);
+  mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
+  _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count());
+  if (elapsed > 0) _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
 
   mi_msecs_t user_time;
   mi_msecs_t sys_time;
@@ -281,13 +281,13 @@ static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun
   size_t page_reclaim;
   size_t peak_commit;
   mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit);
-  _mi_fprintf(out,"%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim );
-  mi_printf_amount((int64_t)peak_rss, 1, out, "%s");
+  _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim );
+  mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s");
   if (peak_commit > 0) {
-    _mi_fprintf(out,", commit charge: ");
-    mi_printf_amount((int64_t)peak_commit, 1, out, "%s");
+    _mi_fprintf(out, arg, ", commit charge: ");
+    mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s");
   }
-  _mi_fprintf(out,"\n");
+  _mi_fprintf(out, arg, "\n");
 }
 
 static mi_msecs_t mi_time_start; // = 0
@@ -319,20 +319,20 @@ void _mi_stats_done(mi_stats_t* stats) {  // called from `mi_thread_done`
   mi_stats_merge_from(stats);
 }
 
-
-static void mi_stats_print_ex(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) {
-  mi_stats_merge_from(stats);
-  _mi_stats_print(&_mi_stats_main, elapsed, out);
+void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
+  mi_msecs_t elapsed = _mi_clock_end(mi_time_start);
+  mi_stats_merge_from(mi_stats_get_default());
+  _mi_stats_print(&_mi_stats_main, elapsed, out, arg);
 }
 
-void mi_stats_print(mi_output_fun* out) mi_attr_noexcept {
-  mi_msecs_t elapsed = _mi_clock_end(mi_time_start);
-  mi_stats_print_ex(mi_stats_get_default(),elapsed,out);
+void mi_stats_print(void* out) mi_attr_noexcept {
+  // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`)
+  mi_stats_print_out((mi_output_fun*)out, NULL);
 }
 
-void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept {
+void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
   mi_msecs_t elapsed = _mi_clock_end(mi_time_start);
-  _mi_stats_print(mi_stats_get_default(), elapsed, out);
+  _mi_stats_print(mi_stats_get_default(), elapsed, out, arg);
 }
 
 
diff --git a/test/main-override-static.c b/test/main-override-static.c
index b04bfeef..54a5ea66 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -13,7 +13,7 @@ static void corrupt_free();
 
 int main() {
   mi_version();
-  
+
   // detect double frees and heap corruption
   // double_free1();
   // double_free2();
@@ -106,4 +106,4 @@ static void corrupt_free() {
   for (int i = 0; i < 4096; i++) {
     malloc(SZ);
   }
-}
\ No newline at end of file
+}
diff --git a/test/test-stress.c b/test/test-stress.c
index d295f741..42628d7c 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -119,7 +119,7 @@ static void free_items(void* p) {
 static void stress(intptr_t tid) {
   //bench_start_thread();
   uintptr_t r = tid * 43;
-  const size_t max_item_shift = 5; // 128  
+  const size_t max_item_shift = 5; // 128
   const size_t max_item_retained_shift = max_item_shift + 2;
   size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
   size_t retain = allocs / 2;
@@ -135,7 +135,7 @@ static void stress(intptr_t tid) {
       allocs--;
       if (data_top >= data_size) {
         data_size += 100000;
-        data = (void**)custom_realloc(data, data_size * sizeof(void*));        
+        data = (void**)custom_realloc(data, data_size * sizeof(void*));
       }
       data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
     }

From c9b5ac80b3a22a2456035651afcae1966ce6d3ee Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 15 Jan 2020 12:00:44 -0800
Subject: [PATCH 096/104] update page reset queue to just do delayed page
 resets

---
 include/mimalloc-types.h |   3 +-
 src/init.c               |   2 +-
 src/options.c            |   2 +-
 src/segment.c            | 192 ++++++++++++++++++---------------------
 4 files changed, 89 insertions(+), 110 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 51306808..5d5f6dfc 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -417,8 +417,7 @@ typedef struct mi_os_tld_s {
 typedef struct mi_segments_tld_s {
   mi_segment_queue_t  small_free;   // queue of segments with free small pages
   mi_segment_queue_t  medium_free;  // queue of segments with free medium pages
-  mi_page_queue_t     small_pages_free;   // page queue of free small pages 
-  mi_page_queue_t     medium_pages_free;  // page queue of free medium pages 
+  mi_page_queue_t     pages_reset;  // queue of freed pages that can be reset
   size_t              count;        // current number of segments;
   size_t              peak_count;   // peak number of segments
   size_t              current_size; // current size of all segments
diff --git a/src/init.c b/src/init.c
index 085a5011..debc2517 100644
--- a/src/init.c
+++ b/src/init.c
@@ -105,7 +105,7 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
 static mi_tld_t tld_main = {
   0, false,
   &_mi_heap_main,
-  { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, {NULL ,NULL, 0}, 
+  { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, 
     0, 0, 0, 0, 0, 0, NULL, 
     tld_main_stats, tld_main_os 
   }, // segments
diff --git a/src/options.c b/src/options.c
index 77205713..17e3a836 100644
--- a/src/options.c
+++ b/src/options.c
@@ -67,7 +67,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 0, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
+  { 1, UNINIT, MI_OPTION(page_reset) },          // reset pages on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 100, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
diff --git a/src/segment.c b/src/segment.c
index fb5ea0ec..a2cd945c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -134,18 +134,10 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) {
   }
 }
 
-static mi_page_queue_t* mi_segment_page_free_queue(mi_page_kind_t kind, mi_segments_tld_t* tld) {
-  if (kind==MI_PAGE_SMALL) return &tld->small_pages_free;
-  else if (kind==MI_PAGE_MEDIUM) return &tld->medium_pages_free;
-  else return NULL;
-}
-
 
 #if (MI_DEBUG>=3)
-static bool mi_segment_page_free_contains(mi_page_kind_t kind, const mi_page_t* page, mi_segments_tld_t* tld) {
-  const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld);
-  if (pq == NULL) return false;
-  mi_page_t* p = pq->first;
+static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) {
+  mi_page_t* p = tld->pages_reset.first;
   while (p != NULL) {
     if (p == page) return true;
     p = p->next;
@@ -164,8 +156,8 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
     if (!page->segment_in_use) {
       nfree++;      
     }
-    else {
-      mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld));
+    if (page->segment_in_use || page->is_reset) {
+      mi_assert_expensive(!mi_pages_reset_contains(page, tld));
     }
   }
   mi_assert_internal(nfree + segment->used == segment->capacity);
@@ -176,17 +168,15 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
 }
 #endif
 
-static bool mi_segment_page_free_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) {
-  mi_page_kind_t kind = _mi_page_segment(page)->page_kind;
+static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) {
   if (page->next != NULL || page->prev != NULL) {
-    mi_assert_internal(mi_segment_page_free_contains(kind, page, tld));
+    mi_assert_internal(mi_pages_reset_contains(page, tld));
     return false;
   }
-  if (kind > MI_PAGE_MEDIUM) return true;
-  // both next and prev are NULL, check for singleton list
-  const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld);
-  mi_assert_internal(pq!=NULL);
-  return (pq->first != page && pq->last != page);
+  else {
+    // both next and prev are NULL, check for singleton list
+    return (tld->pages_reset.first != page && tld->pages_reset.last != page);
+  }
 }
 
 
@@ -274,44 +264,57 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size,
   The free page queue
 ----------------------------------------------------------- */
 
-static void mi_segment_page_free_set_expire(mi_page_t* page) {
-  *((intptr_t*)(&page->heap)) = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+// we re-use the heap field for the expiration counter. Since this is a
+// pointer, it can be 32-bit while the clock is always 64-bit. To guard
+// against overflow, we use substraction to check for expiry which work
+// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days)
+static void mi_page_reset_set_expire(mi_page_t* page) {
+  intptr_t expire = (intptr_t)(_mi_clock_now() + mi_option_get(mi_option_reset_delay));
+  page->heap = (mi_heap_t*)expire;
 }
 
-static mi_msecs_t mi_segment_page_free_get_expire(mi_page_t* page) {
-  return *((intptr_t*)(&page->heap));
+static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) {
+  intptr_t expire = (intptr_t)(page->heap);
+  return (((intptr_t)now - expire) >= 0);
 }
 
-static void mi_segment_page_free_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
-  mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM);
+static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert_internal(!page->segment_in_use);
-  mi_assert_internal(_mi_page_segment(page) == segment);
-  mi_assert_internal(mi_segment_page_free_not_in_queue(page,tld));
-  mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld));
-  mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld);
-  // push on top 
-  mi_segment_page_free_set_expire(page);
-  page->next = pq->first;
-  page->prev = NULL;
-  if (pq->first == NULL) {
-    mi_assert_internal(pq->last == NULL);
-    pq->first = pq->last = page;
+  mi_assert_internal(mi_page_not_in_queue(page,tld));
+  mi_assert_expensive(!mi_pages_reset_contains(page, tld));
+  mi_assert_internal(_mi_page_segment(page)==segment);
+  if (!mi_option_is_enabled(mi_option_page_reset)) return;
+  if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return;
+
+  if (mi_option_get(mi_option_reset_delay) == 0) {
+    // reset immediately?
+    mi_page_reset(segment, page, 0, tld);
   }
   else {
-    pq->first->prev = page;
-    pq->first = page;
+    // otherwise push on the delayed page reset queue
+    mi_page_queue_t* pq = &tld->pages_reset;
+    // push on top 
+    mi_page_reset_set_expire(page);
+    page->next = pq->first;
+    page->prev = NULL;
+    if (pq->first == NULL) {
+      mi_assert_internal(pq->last == NULL);
+      pq->first = pq->last = page;
+    }
+    else {
+      pq->first->prev = page;
+      pq->first = page;
+    }
   }
 }
 
-static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
-  if (segment->page_kind > MI_PAGE_MEDIUM) return;
-  if (mi_segment_page_free_not_in_queue(page,tld)) return;
+static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) {
+  if (mi_page_not_in_queue(page,tld)) return;
 
-  mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld);
+  mi_page_queue_t* pq = &tld->pages_reset;
   mi_assert_internal(pq!=NULL);
-  mi_assert_internal(_mi_page_segment(page)==segment);
   mi_assert_internal(!page->segment_in_use);
-  mi_assert_internal(mi_segment_page_free_contains(segment->page_kind, page, tld));  
+  mi_assert_internal(mi_pages_reset_contains(page, tld));  
   if (page->prev != NULL) page->prev->next = page->next;
   if (page->next != NULL) page->next->prev = page->prev;
   if (page == pq->last)  pq->last = page->prev;
@@ -320,33 +323,33 @@ static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page,
   page->heap = NULL;
 }
 
-static void mi_segment_page_free_remove_all(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  if (segment->page_kind > MI_PAGE_MEDIUM) return;
+static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, mi_segments_tld_t* tld) {
+  if (segment->mem_is_fixed) return;
   for (size_t i = 0; i < segment->capacity; i++) {
     mi_page_t* page = &segment->pages[i];
-    if (!page->segment_in_use) {
-      mi_segment_page_free_remove(segment, page, tld);
+    if (!page->segment_in_use && !page->is_reset) {
+      mi_pages_reset_remove(page, tld);
     }
+    else {
+      mi_assert_internal(mi_page_not_in_queue(page,tld));
+    }    
   }
 }
 
-static mi_page_t* mi_segment_page_free_top(mi_page_kind_t kind, mi_segments_tld_t* tld) {
-  mi_assert_internal(kind <= MI_PAGE_MEDIUM);
-  mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld);
-  return pq->first;
-}
-
-static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t kind, mi_segments_tld_t* tld) { 
-  mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld);
-  mi_assert_internal(pq != NULL);
+static void mi_reset_delayed(mi_segments_tld_t* tld) {
+  if (!mi_option_is_enabled(mi_option_page_reset)) return;
+  mi_msecs_t now = _mi_clock_now();
+  mi_page_queue_t* pq = &tld->pages_reset;  
+  // from oldest up to the first that has not expired yet
   mi_page_t* page = pq->last;
-  while (page != NULL && (now - mi_segment_page_free_get_expire(page)) >= 0) {
-    mi_page_t* const prev = page->prev;
+  while (page != NULL && mi_page_reset_is_expired(page,now)) {
+    mi_page_t* const prev = page->prev; // save previous field
     mi_page_reset(_mi_page_segment(page), page, 0, tld);
     page->heap = NULL;
     page->prev = page->next = NULL;
     page = prev;
   }
+  // discard the reset pages from the queue
   pq->last = page;
   if (page != NULL){
     page->next = NULL;
@@ -356,12 +359,6 @@ static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t k
   }
 }
 
-static void mi_segment_page_free_reset_delayed(mi_segments_tld_t* tld) {
-  if (!mi_option_is_enabled(mi_option_page_reset)) return;
-  mi_msecs_t now = _mi_clock_now();
-  mi_segment_page_free_reset_delayedx(now, MI_PAGE_SMALL, tld);
-  mi_segment_page_free_reset_delayedx(now, MI_PAGE_MEDIUM, tld);
-}
 
 
 
@@ -541,10 +538,8 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
   }
   mi_assert_internal(tld->cache_count == 0);
   mi_assert_internal(tld->cache == NULL);
-  mi_assert_internal(tld->small_pages_free.first == NULL);
-  mi_assert_internal(tld->medium_pages_free.first == NULL);
-  mi_assert_internal(tld->small_free.first == NULL);
-  mi_assert_internal(tld->medium_free.first == NULL);
+  mi_assert_internal(tld->pages_reset.first == NULL);  
+  mi_assert_internal(tld->pages_reset.last == NULL);
 }
 
 
@@ -672,7 +667,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
   UNUSED(force);  
   mi_assert(segment != NULL);
-  mi_segment_page_free_remove_all(segment, tld);
+  mi_pages_reset_remove_all_in_segment(segment, tld);
   mi_segment_remove_from_free_queue(segment,tld);
 
   mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment));
@@ -703,7 +698,7 @@ static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg
   mi_assert_internal(_mi_page_segment(page) == segment);
   mi_assert_internal(!page->segment_in_use);    
   // set in-use before doing unreset to prevent delayed reset
-  mi_segment_page_free_remove(segment, page, tld);
+  mi_pages_reset_remove(page, tld);
   page->segment_in_use = true;
   segment->used++;
   if (!page->is_committed) {
@@ -744,7 +739,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   mi_assert_internal(page->segment_in_use);
   mi_assert_internal(mi_page_all_free(page));
   mi_assert_internal(page->is_committed);
-  mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld));
+  mi_assert_internal(mi_page_not_in_queue(page, tld));
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
@@ -770,7 +765,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
 
   // add to the free page list for reuse/reset
   if (segment->page_kind <= MI_PAGE_MEDIUM) {
-    mi_segment_page_free_add(segment, page, tld);
+    mi_pages_reset_add(segment, page, tld);
   }
 }
 
@@ -779,7 +774,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  mi_segment_page_free_reset_delayed(tld);
+  mi_reset_delayed(tld);
 
   // mark it as free now
   mi_segment_page_clear(segment, page, tld);
@@ -841,8 +836,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
 
   // remove the segment from the free page queue if needed
-  mi_segment_page_free_reset_delayed(tld);
-  mi_segment_page_free_remove_all(segment, tld);
+  mi_reset_delayed(tld);
+  mi_pages_reset_remove_all_in_segment(segment, tld); // do not force reset on free pages in an abandoned segment, as it is already done in segment_thread_collect
   mi_segment_remove_from_free_queue(segment, tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
 
@@ -858,7 +853,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld));
+  mi_assert_expensive(!mi_pages_reset_contains(page, tld));
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
   segment->abandoned++;
   _mi_stat_increase(&tld->stats->pages_abandoned, 1);
@@ -916,7 +911,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
       if (page->segment_in_use) {
         mi_assert_internal(!page->is_reset);
         mi_assert_internal(page->is_committed);
-        mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld));
+        mi_assert_internal(mi_page_not_in_queue(page, tld));
         segment->abandoned--;
         mi_assert(page->next == NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
@@ -957,7 +952,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
   mi_assert_expensive(mi_segment_is_valid(segment, tld));
-  for (size_t i = 0; i < segment->capacity; i++) {
+  for (size_t i = 0; i < segment->capacity; i++) {  // TODO: use a bitmap instead of search?
     mi_page_t* page = &segment->pages[i];
     if (!page->segment_in_use) {
       mi_segment_page_claim(segment, page, tld);
@@ -968,7 +963,6 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
   return NULL;
 }
 
-
 // Allocate a page inside a segment. Requires that the page has free pages
 static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
@@ -976,33 +970,19 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl
 }
 
 static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {  
-  mi_page_t* page = NULL;
+  // find an available segment the segment free queue 
   mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld);
-  if (free_queue->first != NULL && free_queue->first->used < free_queue->first->capacity) {
-    // prefer to allocate from an available segment 
-    // (to allow more chance of other segments to become completely freed)
-    page = mi_segment_page_alloc_in(free_queue->first, tld);
-  }
-  else {
-    // otherwise try to pop from the page free list
-    page = mi_segment_page_free_top(kind, tld);
-    if (page != NULL) {
-      mi_segment_page_claim(_mi_page_segment(page), page, tld);
-    }
-    else {
-      // if that failed, find an available segment the segment free queue again
-      if (mi_segment_queue_is_empty(free_queue)) {
-        // possibly allocate a fresh segment
-        mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld);
-        if (segment == NULL) return NULL;  // return NULL if out-of-memory
-        mi_segment_enqueue(free_queue, segment);
-      }
-      mi_assert_internal(free_queue->first != NULL);
-      page = mi_segment_page_alloc_in(free_queue->first, tld);
-    }
+  if (mi_segment_queue_is_empty(free_queue)) {
+    // possibly allocate a fresh segment
+    mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld);
+    if (segment == NULL) return NULL;  // return NULL if out-of-memory
+    mi_segment_enqueue(free_queue, segment);
   }
+  mi_assert_internal(free_queue->first != NULL);
+  mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld);
   mi_assert_internal(page != NULL);
 #if MI_DEBUG>=2
+  // verify it is committed
   _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0;
 #endif
   return page;
@@ -1062,7 +1042,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
   }
   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
   mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
-  mi_segment_page_free_reset_delayed(tld);
-  mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld));
+  mi_reset_delayed(tld);
+  mi_assert_internal(mi_page_not_in_queue(page, tld));
   return page;
 }

From 202246425b5c0f2f0dc68a6de9fc9fa6628d7822 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 15 Jan 2020 12:16:01 -0800
Subject: [PATCH 097/104] bump version to 1.4 for further development

---
 cmake/mimalloc-config-version.cmake | 2 +-
 include/mimalloc.h                  | 2 +-
 readme.md                           | 3 +++
 test/CMakeLists.txt                 | 2 +-
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index f64948d3..0a982bdf 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,5 +1,5 @@
 set(mi_version_major 1)
-set(mi_version_minor 3)
+set(mi_version_minor 4)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 51d96609..fe09c7f2 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 130   // major + 2 digits minor
+#define MI_MALLOC_VERSION 140   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes
diff --git a/readme.md b/readme.md
index 9d3974c9..0a096b5e 100644
--- a/readme.md
+++ b/readme.md
@@ -56,6 +56,9 @@ Enjoy!
 
 ### Releases
 
+
+* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and stronger
+free list encoding in secure mode.
 * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
 * 2019-10-07, `v1.1.0`: stable release 1.1.
 * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index ed204888..4862c0ec 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE)
 endif()
 
 # Import mimalloc (if installed)
-find_package(mimalloc 1.3 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
+find_package(mimalloc 1.4 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
 message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}")
 
 # overriding with a dynamic library

From 0099707af905cddaab3d51a5639a1a2ae21ecf3c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 15 Jan 2020 17:19:01 -0800
Subject: [PATCH 098/104] use delayed free for all pages; reduce size of the
 page structure for improved address calculation

---
 include/mimalloc-internal.h |  45 +++++++--
 include/mimalloc-types.h    |  57 ++++++-----
 src/alloc.c                 | 131 +++++++++++++------------
 src/heap.c                  |  67 +++++++------
 src/init.c                  |  11 +--
 src/page-queue.c            |  44 ++++-----
 src/page.c                  | 190 +++++++++++++++---------------------
 src/segment.c               |  19 ++--
 8 files changed, 296 insertions(+), 268 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index d5ce9f59..a9391a40 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -308,7 +308,7 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
 
 // Quick page start for initialized pages
 static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
-  const size_t bsize = page->block_size;
+  const size_t bsize = page->xblock_size;
   mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0);
   return _mi_segment_page_start(segment, page, bsize, page_size, NULL);
 }
@@ -318,7 +318,40 @@ static inline mi_page_t* _mi_ptr_page(void* p) {
   return _mi_segment_page_of(_mi_ptr_segment(p), p);
 }
 
+// Get the block size of a page (special cased for huge objects)
+static inline size_t mi_page_block_size(const mi_page_t* page) {
+  const size_t bsize = page->xblock_size;
+  mi_assert_internal(bsize > 0);
+  if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) {
+    return bsize;
+  }
+  else {
+    size_t psize;
+    _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL);
+    return psize;
+  }
+}
+
 // Thread free access
+static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
+  return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3);
+}
+
+static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
+  return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3);
+}
+
+// Heap access
+static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
+  return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap));
+}
+
+static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
+  mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
+  mi_atomic_write(&page->xheap,(uintptr_t)heap);
+}
+
+// Thread free flag helpers
 static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
   return (mi_block_t*)(tf & ~0x03);
 }
@@ -338,7 +371,7 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t*
 // are all blocks in a page freed?
 static inline bool mi_page_all_free(const mi_page_t* page) {
   mi_assert_internal(page != NULL);
-  return (page->used - page->thread_freed == 0);
+  return (page->used == 0);
 }
 
 // are there immediately available blocks
@@ -349,8 +382,8 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) {
 // are there free blocks in this page?
 static inline bool mi_page_has_free(mi_page_t* page) {
   mi_assert_internal(page != NULL);
-  bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL));
-  mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity);
+  bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL));
+  mi_assert_internal(hasfree || page->used == page->capacity);
   return hasfree;
 }
 
@@ -364,7 +397,7 @@ static inline bool mi_page_all_used(mi_page_t* page) {
 static inline bool mi_page_mostly_used(const mi_page_t* page) {
   if (page==NULL) return true;
   uint16_t frac = page->reserved / 8U;
-  return (page->reserved - page->used + page->thread_freed <= frac);
+  return (page->reserved - page->used <= frac);
 }
 
 static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
@@ -467,7 +500,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t*
   // check for free list corruption: is `next` at least in the same page?
   // TODO: check if `next` is `page->block_size` aligned?
   if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
-    _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
+    _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
     next = NULL;
   }
   return next;
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index da9bfbac..bf288d60 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -124,6 +124,9 @@ terms of the MIT license. A copy of the license can be found in the file
 #error "define more bins"
 #endif
 
+// Used as a special value to encode block sizes in 32 bits.
+#define MI_HUGE_BLOCK_SIZE   ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
+
 // The free lists use encoded next fields
 // (Only actually encodes when MI_ENCODED_FREELIST is defined.)
 typedef uintptr_t mi_encoded_t;
@@ -136,10 +139,10 @@ typedef struct mi_block_s {
 
 // The delayed flags are used for efficient multi-threaded free-ing
 typedef enum mi_delayed_e {
-  MI_NO_DELAYED_FREE = 0,
-  MI_USE_DELAYED_FREE = 1,
-  MI_DELAYED_FREEING = 2,
-  MI_NEVER_DELAYED_FREE = 3
+  MI_USE_DELAYED_FREE   = 0, // push on the owning heap thread delayed list
+  MI_DELAYED_FREEING    = 1, // temporary: another thread is accessing the owning heap
+  MI_NO_DELAYED_FREE    = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
+  MI_NEVER_DELAYED_FREE = 3  // sticky, only resets on page reclaim
 } mi_delayed_t;
 
 
@@ -167,14 +170,28 @@ typedef uintptr_t mi_thread_free_t;
 // implement a monotonic heartbeat. The `thread_free` list is needed for
 // avoiding atomic operations in the common case.
 //
-// `used - thread_freed` == actual blocks that are in use (alive)
-// `used - thread_freed + |free| + |local_free| == capacity`
 //
-// note: we don't count `freed` (as |free|) instead of `used` to reduce
-//       the number of memory accesses in the `mi_page_all_free` function(s).
-// note: the funny layout here is due to:
-// - access is optimized for `mi_free` and `mi_page_alloc`
-// - using `uint16_t` does not seem to slow things down
+// `used - |thread_free|` == actual blocks that are in use (alive)
+// `used - |thread_free| + |free| + |local_free| == capacity`
+//
+// We don't count `freed` (as |free|) but use `used` to reduce
+// the number of memory accesses in the `mi_page_all_free` function(s).
+//
+// Notes: 
+// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
+// - Using `uint16_t` does not seem to slow things down
+// - The size is 8 words on 64-bit which helps the page index calculations
+//   (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 
+//    and 12 are still good for address calculation)
+// - To limit the structure size, the `xblock_size` is 32-bits only; for 
+//   blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
+// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
+//   concurrent frees where only the first concurrent free adds to the owning
+//   heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
+//   The invariant is that no-delayed-free is only set if there is
+//   at least one block that will be added, or as already been added, to 
+//   the owning heap `thread_delayed_free` list. This guarantees that pages
+//   will be freed correctly even if only other threads free blocks.
 typedef struct mi_page_s {
   // "owned" by the segment
   uint8_t               segment_idx;       // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
@@ -194,23 +211,15 @@ typedef struct mi_page_s {
   #ifdef MI_ENCODE_FREELIST
   uintptr_t             key[2];            // two random keys to encode the free lists (see `_mi_block_next`)
   #endif
-  size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
+  uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
+  uint32_t              xblock_size;       // size available in each block (always `>0`) 
 
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
-  volatile _Atomic(uintptr_t)        thread_freed;  // at least this number of blocks are in `thread_free`
-  volatile _Atomic(mi_thread_free_t) thread_free;   // list of deferred free blocks freed by other threads
-
-  // less accessed info
-  size_t                block_size;        // size available in each block (always `>0`)
-  mi_heap_t*            heap;              // the owning heap
+  volatile _Atomic(mi_thread_free_t) xthread_free;   // list of deferred free blocks freed by other threads
+  volatile _Atomic(uintptr_t)        xheap;
+  
   struct mi_page_s*     next;              // next page owned by this thread with the same `block_size`
   struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
-
-  // improve page index calculation
-  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words
-  #if (MI_INTPTR_SIZE==4)
-  void*                 padding[1];        // 12/14 words on 32-bit plain
-  #endif
 } mi_page_t;
 
 
diff --git a/src/alloc.c b/src/alloc.c
index bd81aba0..621fb0db 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Fast allocation in a page: just pop from the free list.
 // Fall back to generic allocation only if the list is empty.
 extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
-  mi_assert_internal(page->block_size==0||page->block_size >= size);
+  mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
   mi_block_t* block = page->free;
   if (mi_unlikely(block == NULL)) {
     return _mi_malloc_generic(heap, size); // slow path
@@ -94,16 +94,16 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
   // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
   UNUSED(size);
   mi_assert_internal(p != NULL);
-  mi_assert_internal(size > 0 && page->block_size >= size);
+  mi_assert_internal(size > 0 && mi_page_block_size(page) >= size);
   mi_assert_internal(_mi_ptr_page(p)==page);
   if (page->is_zero) {
     // already zero initialized memory?
     ((mi_block_t*)p)->next = 0;  // clear the free list pointer
-    mi_assert_expensive(mi_mem_is_zero(p,page->block_size));
+    mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page)));
   }
   else {
     // otherwise memset
-    memset(p, 0, page->block_size);
+    memset(p, 0, mi_page_block_size(page));
   }
 }
 
@@ -141,13 +141,12 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
 
 static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
   // The decoded value is in the same page (or NULL).
-  // Walk the free lists to verify positively if it is already freed
-  mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free));
+  // Walk the free lists to verify positively if it is already freed  
   if (mi_list_contains(page, page->free, block) ||
       mi_list_contains(page, page->local_free, block) ||
-      mi_list_contains(page, mi_tf_block(tf), block))
+      mi_list_contains(page, mi_page_thread_free(page), block))
   {
-    _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
+    _mi_fatal_error("double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
     return true;
   }
   return false;
@@ -177,44 +176,50 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
 // Free
 // ------------------------------------------------------
 
+// free huge block from another thread
+static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
+  // huge page segments are always abandoned and can be freed immediately
+  mi_assert_internal(segment->page_kind==MI_PAGE_HUGE);
+  mi_assert_internal(segment == _mi_page_segment(page));
+  mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
+
+  // claim it and free
+  mi_heap_t* heap = mi_get_default_heap();
+  // paranoia: if this it the last reference, the cas should always succeed
+  if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) {
+    mi_block_set_next(page, block, page->free);
+    page->free = block;
+    page->used--;
+    page->is_zero = false;
+    mi_assert(page->used == 0);
+    mi_tld_t* tld = heap->tld;
+    const size_t bsize = mi_page_block_size(page);
+    if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
+      _mi_stat_decrease(&tld->stats.giant, bsize);
+    }
+    else {
+      _mi_stat_decrease(&tld->stats.huge, bsize);
+    }
+    _mi_segment_page_free(page, true, &tld->segments);
+  }
+}
+
 // multi-threaded free
 static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
 {
-  mi_thread_free_t tfree;
-  mi_thread_free_t tfreex;
-  bool use_delayed;
-
+  // huge page segments are always abandoned and can be freed immediately
   mi_segment_t* segment = _mi_page_segment(page);
   if (segment->page_kind==MI_PAGE_HUGE) {
-    // huge page segments are always abandoned and can be freed immediately
-    mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
-    mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL);
-    // claim it and free
-    mi_heap_t* heap = mi_get_default_heap();
-    // paranoia: if this it the last reference, the cas should always succeed
-    if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) {
-      mi_block_set_next(page, block, page->free);
-      page->free = block;
-      page->used--;
-      page->is_zero = false;
-      mi_assert(page->used == 0);
-      mi_tld_t* tld = heap->tld;
-      if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) {
-        _mi_stat_decrease(&tld->stats.giant, page->block_size);
-      }
-      else {
-        _mi_stat_decrease(&tld->stats.huge, page->block_size);
-      }
-      _mi_segment_page_free(page,true,&tld->segments);
-    }
+    mi_free_huge_block_mt(segment, page, block);
     return;
   }
 
+  mi_thread_free_t tfree;
+  mi_thread_free_t tfreex;
+  bool use_delayed;
   do {
-    tfree = page->thread_free;
-    use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE ||
-                   (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1)  // data-race but ok, just optimizes early release of the page
-                  );
+    tfree = mi_atomic_read_relaxed(&page->xthread_free);
+    use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
     if (mi_unlikely(use_delayed)) {
       // unlikely: this only happens on the first concurrent free in a page that is in the full list
       tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
@@ -224,15 +229,11 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
       mi_block_set_next(page, block, mi_tf_block(tfree));
       tfreex = mi_tf_set_block(tfree,block);
     }
-  } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
+  } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
 
-  if (mi_likely(!use_delayed)) {
-    // increment the thread free count and return
-    mi_atomic_increment(&page->thread_freed);
-  }
-  else {
+  if (mi_unlikely(use_delayed)) {
     // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
-    mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
+    mi_heap_t* heap = mi_page_heap(page);
     mi_assert_internal(heap != NULL);
     if (heap != NULL) {
       // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
@@ -245,10 +246,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
 
     // and reset the MI_DELAYED_FREEING flag
     do {
-      tfreex = tfree = page->thread_free;
-      mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
-      if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
-    } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
+      tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free);
+      mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
+      tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
+    } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
   }
 }
 
@@ -257,7 +258,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
 static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
 {
   #if (MI_DEBUG)
-  memset(block, MI_DEBUG_FREED, page->block_size);
+  memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
   #endif
 
   // and push it on the free list
@@ -284,7 +285,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
 mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) {
   mi_assert_internal(page!=NULL && p!=NULL);
   size_t diff   = (uint8_t*)p - _mi_page_start(segment, page, NULL);
-  size_t adjust = (diff % page->block_size);
+  size_t adjust = (diff % mi_page_block_size(page));
   return (mi_block_t*)((uintptr_t)p - adjust);
 }
 
@@ -329,8 +330,8 @@ void mi_free(void* p) mi_attr_noexcept
 #if (MI_STAT>1)
   mi_heap_t* heap = mi_heap_get_default();
   mi_heap_stat_decrease(heap, malloc, mi_usable_size(p));
-  if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) {
-    mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1);
+  if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) {
+    mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1);
   }
   // huge page stat is accounted for in `_mi_page_retire`
 #endif
@@ -342,7 +343,9 @@ void mi_free(void* p) mi_attr_noexcept
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
     page->used--;
-    if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
+    if (mi_unlikely(mi_page_all_free(page))) { 
+      _mi_page_retire(page); 
+    }
   }
   else {
     // non-local, aligned blocks, or a full page; use the more generic path
@@ -356,13 +359,19 @@ bool _mi_free_delayed_block(mi_block_t* block) {
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(_mi_thread_id() == segment->thread_id);
   mi_page_t* page = _mi_segment_page_of(segment, block);
-  if (mi_tf_delayed(page->thread_free) == MI_DELAYED_FREEING) {
-    // we might already start delayed freeing while another thread has not yet
-    // reset the delayed_freeing flag; in that case don't free it quite yet if
-    // this is the last block remaining.
-    if (page->used - page->thread_freed == 1) return false;
-  }
-  _mi_free_block(page,true,block);
+
+  // Clear the no-delayed flag so delayed freeing is used again for this page.
+  // This must be done before collecting the free lists on this page -- otherwise
+  // some blocks may end up in the page `thread_free` list with no blocks in the
+  // heap `thread_delayed_free` list which may cause the page to be never freed!
+  // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
+  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */);
+
+  // collect all other non-local frees to ensure up-to-date `used` count
+  _mi_page_free_collect(page, false);
+
+  // and free the block (possibly freeing the page as well since used is updated)
+  _mi_free_block(page, true, block);
   return true;
 }
 
@@ -371,7 +380,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
   if (p==NULL) return 0;
   const mi_segment_t* segment = _mi_ptr_segment(p);
   const mi_page_t* page = _mi_segment_page_of(segment,p);
-  size_t size = page->block_size;
+  size_t size = mi_page_block_size(page);
   if (mi_unlikely(mi_page_has_aligned(page))) {
     ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p);
     mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
diff --git a/src/heap.c b/src/heap.c
index 4a589e5c..9f2a4457 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -34,7 +34,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
     mi_page_t* page = pq->first;
     while(page != NULL) {
       mi_page_t* next = page->next; // save next in case the page gets removed from the queue
-      mi_assert_internal(page->heap == heap);
+      mi_assert_internal(mi_page_heap(page) == heap);
       count++;
       if (!fn(heap, pq, page, arg1, arg2)) return false;
       page = next; // and continue
@@ -50,7 +50,7 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   UNUSED(arg1);
   UNUSED(arg2);
   UNUSED(pq);
-  mi_assert_internal(page->heap == heap);
+  mi_assert_internal(mi_page_heap(page) == heap);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_internal(segment->thread_id == heap->thread_id);
   mi_assert_expensive(_mi_page_is_valid(page));
@@ -118,13 +118,18 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
       // this may free some segments (but also take ownership of abandoned pages)
       _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments);
     }
-    #if MI_DEBUG
-    else if (collect == ABANDON && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
+    else if (
+              #ifdef NDEBUG
+              collect == FORCE
+              #else
+              collect >= FORCE
+              #endif 
+              && _mi_is_main_thread() && mi_heap_is_backing(heap)) 
+    {
       // the main thread is abandoned, try to free all abandoned segments.
       // if all memory is freed by now, all segments should be freed.
       _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments);
     }
-    #endif
   }
 
   // if abandoning, mark all pages to no longer add to delayed_free
@@ -245,25 +250,27 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);  
 
   // stats
-  if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) {
-    if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) {
-      _mi_stat_decrease(&heap->tld->stats.giant,page->block_size);
+  const size_t bsize = mi_page_block_size(page);
+  if (bsize > MI_LARGE_OBJ_SIZE_MAX) {
+    if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
+      _mi_stat_decrease(&heap->tld->stats.giant, bsize);
     }
     else {
-      _mi_stat_decrease(&heap->tld->stats.huge, page->block_size);
+      _mi_stat_decrease(&heap->tld->stats.huge, bsize);
     }
   }
-  #if (MI_STAT>1)
-  size_t inuse = page->used - page->thread_freed;
-  if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX)  {
-    mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse);
+#if (MI_STAT>1)
+  _mi_page_free_collect(page, false);  // update used count
+  const size_t inuse = page->used;
+  if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+    mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], inuse);
   }
-  mi_heap_stat_decrease(heap,malloc, page->block_size * inuse);  // todo: off for aligned blocks...
-  #endif
+  mi_heap_stat_decrease(heap, malloc, bsize * inuse);  // todo: off for aligned blocks...
+#endif
 
-  // pretend it is all free now
-  mi_assert_internal(page->thread_freed<=0xFFFF);
-  page->used = (uint16_t)page->thread_freed;
+  /// pretend it is all free now
+  mi_assert_internal(mi_page_thread_free(page) == NULL);
+  page->used = 0;
 
   // and free the page
   _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
@@ -374,7 +381,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) {
   bool valid = (_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(valid);
   if (mi_unlikely(!valid)) return NULL;
-  return _mi_segment_page_of(segment,p)->heap;
+  return mi_page_heap(_mi_segment_page_of(segment,p));
 }
 
 bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {
@@ -390,7 +397,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
   bool* found = (bool*)vfound;
   mi_segment_t* segment = _mi_page_segment(page);
   void* start = _mi_page_start(segment, page, NULL);
-  void* end   = (uint8_t*)start + (page->capacity * page->block_size);
+  void* end   = (uint8_t*)start + (page->capacity * mi_page_block_size(page));
   *found = (p >= start && p < end);
   return (!*found); // continue if not found
 }
@@ -432,13 +439,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
   mi_assert_internal(page->local_free == NULL);
   if (page->used == 0) return true;
 
+  const size_t bsize = mi_page_block_size(page);
   size_t   psize;
   uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
 
   if (page->capacity == 1) {
     // optimize page with one block
     mi_assert_internal(page->used == 1 && page->free == NULL);
-    return visitor(page->heap, area, pstart, page->block_size, arg);
+    return visitor(mi_page_heap(page), area, pstart, bsize, arg);
   }
 
   // create a bitmap of free blocks.
@@ -451,8 +459,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
     free_count++;
     mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
     size_t offset = (uint8_t*)block - pstart;
-    mi_assert_internal(offset % page->block_size == 0);
-    size_t blockidx = offset / page->block_size;  // Todo: avoid division?
+    mi_assert_internal(offset % bsize == 0);
+    size_t blockidx = offset / bsize;  // Todo: avoid division?
     mi_assert_internal( blockidx < MI_MAX_BLOCKS);
     size_t bitidx = (blockidx / sizeof(uintptr_t));
     size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
@@ -471,8 +479,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
     }
     else if ((m & ((uintptr_t)1 << bit)) == 0) {
       used_count++;
-      uint8_t* block = pstart + (i * page->block_size);
-      if (!visitor(page->heap, area, block, page->block_size, arg)) return false;
+      uint8_t* block = pstart + (i * bsize);
+      if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false;
     }
   }
   mi_assert_internal(page->used == used_count);
@@ -487,12 +495,13 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
   UNUSED(pq);
   mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
   mi_heap_area_ex_t xarea;
+  const size_t bsize = mi_page_block_size(page);
   xarea.page = page;
-  xarea.area.reserved = page->reserved * page->block_size;
-  xarea.area.committed = page->capacity * page->block_size;
+  xarea.area.reserved = page->reserved * bsize;
+  xarea.area.committed = page->capacity * bsize;
   xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
-  xarea.area.used  = page->used - page->thread_freed; // race is ok
-  xarea.area.block_size = page->block_size;
+  xarea.area.used = page->used;
+  xarea.area.block_size = bsize;
   return fun(heap, &xarea, arg);
 }
 
diff --git a/src/init.c b/src/init.c
index 79e1e044..d81d7459 100644
--- a/src/init.c
+++ b/src/init.c
@@ -23,12 +23,11 @@ const mi_page_t _mi_page_empty = {
   { 0, 0 },
   #endif
   0,       // used
-  NULL,
-  ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
-  0, NULL, NULL, NULL
-  #if (MI_INTPTR_SIZE==4)
-  , { NULL } // padding
-  #endif
+  0,       // xblock_size
+  NULL,    // local_free
+  ATOMIC_VAR_INIT(0), // xthread_free
+  ATOMIC_VAR_INIT(0), // xheap
+  NULL, NULL
 };
 
 #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
diff --git a/src/page-queue.c b/src/page-queue.c
index 95443a69..68e2aaa4 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -178,20 +178,20 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
 #endif
 
 static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
-  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size));
-  mi_heap_t* heap = page->heap;
+  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
+  mi_heap_t* heap = mi_page_heap(page);
   mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
   mi_page_queue_t* pq = &heap->pages[bin];
-  mi_assert_internal(bin >= MI_BIN_HUGE || page->block_size == pq->block_size);
+  mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size);
   mi_assert_expensive(mi_page_queue_contains(pq, page));
   return pq;
 }
 
 static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
-  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size));
+  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
   mi_assert_internal(bin <= MI_BIN_FULL);
   mi_page_queue_t* pq = &heap->pages[bin];
-  mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size);
+  mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
   return pq;
 }
 
@@ -246,35 +246,35 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
 static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
   mi_assert_internal(page != NULL);
   mi_assert_expensive(mi_page_queue_contains(queue, page));
-  mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue))  || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
+  mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue))  || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
+  mi_heap_t* heap = mi_page_heap(page);
   if (page->prev != NULL) page->prev->next = page->next;
   if (page->next != NULL) page->next->prev = page->prev;
   if (page == queue->last)  queue->last = page->prev;
   if (page == queue->first) {
     queue->first = page->next;
     // update first
-    mi_heap_t* heap = page->heap;
     mi_assert_internal(mi_heap_contains_queue(heap, queue));
     mi_heap_queue_first_update(heap,queue);
   }
-  page->heap->page_count--;
+  heap->page_count--;
   page->next = NULL;
   page->prev = NULL;
-  mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
+  // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
   mi_page_set_in_full(page,false);
 }
 
 
 static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
-  mi_assert_internal(page->heap == NULL);
+  mi_assert_internal(mi_page_heap(page) == heap);
   mi_assert_internal(!mi_page_queue_contains(queue, page));
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
-  mi_assert_internal(page->block_size == queue->block_size ||
-                      (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
+  mi_assert_internal(page->xblock_size == queue->block_size ||
+                      (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
                         (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
 
   mi_page_set_in_full(page, mi_page_queue_is_full(queue));
-  mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
+  // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
   page->next = queue->first;
   page->prev = NULL;
   if (queue->first != NULL) {
@@ -296,19 +296,19 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
   mi_assert_internal(page != NULL);
   mi_assert_expensive(mi_page_queue_contains(from, page));
   mi_assert_expensive(!mi_page_queue_contains(to, page));
-  mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) ||
-                     (page->block_size == to->block_size && mi_page_queue_is_full(from)) ||
-                     (page->block_size == from->block_size && mi_page_queue_is_full(to)) ||
-                     (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) ||
-                     (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)));
+  mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) ||
+                     (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) ||
+                     (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) ||
+                     (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) ||
+                     (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)));
 
+  mi_heap_t* heap = mi_page_heap(page);
   if (page->prev != NULL) page->prev->next = page->next;
   if (page->next != NULL) page->next->prev = page->prev;
   if (page == from->last)  from->last = page->prev;
   if (page == from->first) {
     from->first = page->next;
     // update first
-    mi_heap_t* heap = page->heap;
     mi_assert_internal(mi_heap_contains_queue(heap, from));
     mi_heap_queue_first_update(heap, from);
   }
@@ -316,14 +316,14 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
   page->prev = to->last;
   page->next = NULL;
   if (to->last != NULL) {
-    mi_assert_internal(page->heap == to->last->heap);
+    mi_assert_internal(heap == mi_page_heap(to->last));
     to->last->next = page;
     to->last = page;
   }
   else {
     to->first = page;
     to->last = page;
-    mi_heap_queue_first_update(page->heap, to);
+    mi_heap_queue_first_update(heap, to);
   }
 
   mi_page_set_in_full(page, mi_page_queue_is_full(to));
@@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
   // set append pages to new heap and count
   size_t count = 0;
   for (mi_page_t* page = append->first; page != NULL; page = page->next) {
-    mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
+    mi_page_set_heap(page,heap);
     count++;
   }
 
diff --git a/src/page.c b/src/page.c
index 6a6e09d6..40aec0c6 100644
--- a/src/page.c
+++ b/src/page.c
@@ -29,10 +29,11 @@ terms of the MIT license. A copy of the license can be found in the file
 ----------------------------------------------------------- */
 
 // Index a block in a page
-static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t i) {
+static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) {
+  UNUSED(page);
   mi_assert_internal(page != NULL);
   mi_assert_internal(i <= page->reserved);
-  return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size));
+  return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
 }
 
 static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
@@ -69,13 +70,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
 }
 
 static bool mi_page_is_valid_init(mi_page_t* page) {
-  mi_assert_internal(page->block_size > 0);
+  mi_assert_internal(page->xblock_size > 0);
   mi_assert_internal(page->used <= page->capacity);
   mi_assert_internal(page->capacity <= page->reserved);
 
+  const size_t bsize = mi_page_block_size(page);
   mi_segment_t* segment = _mi_page_segment(page);
   uint8_t* start = _mi_page_start(segment,page,NULL);
-  mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL,NULL));
+  mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL));
   //mi_assert_internal(start + page->capacity*page->block_size == page->top);
 
   mi_assert_internal(mi_page_list_is_valid(page,page->free));
@@ -89,10 +91,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   }
   #endif
 
-  mi_block_t* tfree = mi_tf_block(page->thread_free);
+  mi_block_t* tfree = mi_page_thread_free(page);
   mi_assert_internal(mi_page_list_is_valid(page, tfree));
-  size_t tfree_count = mi_page_list_count(page, tfree);
-  mi_assert_internal(tfree_count <= page->thread_freed + 1);
+  //size_t tfree_count = mi_page_list_count(page, tfree);
+  //mi_assert_internal(tfree_count <= page->thread_freed + 1);
 
   size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free);
   mi_assert_internal(page->used + free_count == page->capacity);
@@ -105,14 +107,14 @@ bool _mi_page_is_valid(mi_page_t* page) {
   #if MI_SECURE
   mi_assert_internal(page->key != 0);
   #endif
-  if (page->heap!=NULL) {
+  if (mi_page_heap(page)!=NULL) {
     mi_segment_t* segment = _mi_page_segment(page);
-    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0);
+    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
     if (segment->page_kind != MI_PAGE_HUGE) {
       mi_page_queue_t* pq = mi_page_queue_of(page);
       mi_assert_internal(mi_page_queue_contains(pq, page));
-      mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
-      mi_assert_internal(mi_heap_contains_queue(page->heap,pq));
+      mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
+      mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
     }
   }
   return true;
@@ -124,20 +126,20 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
   mi_thread_free_t tfreex;
   mi_delayed_t     old_delay;
   do {
-    tfree = mi_atomic_read_relaxed(&page->thread_free);
+    tfree = mi_atomic_read(&page->xthread_free);
     tfreex = mi_tf_set_delayed(tfree, delay);
     old_delay = mi_tf_delayed(tfree);
     if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
-      mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
+      // mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
+      tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
     }
     else if (delay == old_delay) {
       break; // avoid atomic operation if already equal
     }
     else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
-      break; // leave never set
+      break; // leave never-delayed flag set
     }
-  } while ((old_delay == MI_DELAYED_FREEING) ||
-    !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree));
+  } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
 }
 
 /* -----------------------------------------------------------
@@ -154,17 +156,17 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
   mi_thread_free_t tfree;
   mi_thread_free_t tfreex;
   do {
-    tfree = page->thread_free;
+    tfree = mi_atomic_read_relaxed(&page->xthread_free);
     head = mi_tf_block(tfree);
     tfreex = mi_tf_set_block(tfree,NULL);
-  } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
+  } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
 
   // return if the list is empty
   if (head == NULL) return;
 
   // find the tail -- also to get a proper count (without data races)
-  uintptr_t max_count = page->capacity; // cannot collect more than capacity
-  uintptr_t count = 1;
+  uint32_t max_count = page->capacity; // cannot collect more than capacity
+  uint32_t count = 1;
   mi_block_t* tail = head;
   mi_block_t* next;
   while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) {
@@ -182,7 +184,6 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
   page->local_free = head;
 
   // update counts now
-  mi_atomic_subu(&page->thread_freed, count);
   page->used -= count;
 }
 
@@ -190,7 +191,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
   mi_assert_internal(page!=NULL);
 
   // collect the thread free list
-  if (force || mi_tf_block(page->thread_free) != NULL) {  // quick test to avoid an atomic operation
+  if (force || mi_page_thread_free(page) != NULL) {  // quick test to avoid an atomic operation
     _mi_page_thread_free_collect(page);
   }
 
@@ -228,15 +229,16 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
 // called from segments when reclaiming abandoned pages
 void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_expensive(mi_page_is_valid_init(page));
-  mi_assert_internal(page->heap == NULL);
+  mi_assert_internal(mi_page_heap(page) == NULL);
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
   mi_assert_internal(!page->is_reset);
-  mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE);
-  _mi_page_free_collect(page,false);
-  mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
+  mi_assert_internal(mi_page_thread_free_flag(page) == MI_NEVER_DELAYED_FREE);
+  mi_page_set_heap(page, heap);
+  mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
   mi_page_queue_push(heap, pq, page);
-  mi_assert_internal(page->heap != NULL);
-  _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set)
+  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
+  // _mi_page_free_collect(page,false); // no need, as it is just done before reclaim
+  mi_assert_internal(mi_page_heap(page)!= NULL);  
   mi_assert_expensive(_mi_page_is_valid(page));
 }
 
@@ -270,8 +272,8 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
   // otherwise allocate the page
   page = mi_page_fresh_alloc(heap, pq, pq->block_size);
   if (page==NULL) return NULL;
-  mi_assert_internal(pq->block_size==page->block_size);
-  mi_assert_internal(pq==mi_page_queue(heap,page->block_size));
+  mi_assert_internal(pq->block_size==mi_page_block_size(page));
+  mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
   return page;
 }
 
@@ -312,11 +314,9 @@ void _mi_page_unfull(mi_page_t* page) {
   mi_assert_internal(page != NULL);
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(mi_page_is_in_full(page));
-
-  _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false);
   if (!mi_page_is_in_full(page)) return;
 
-  mi_heap_t* heap = page->heap;
+  mi_heap_t* heap = mi_page_heap(page);
   mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL];
   mi_page_set_in_full(page, false); // to get the right queue
   mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
@@ -329,10 +329,8 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_internal(!mi_page_immediate_available(page));
   mi_assert_internal(!mi_page_is_in_full(page));
 
-  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
   if (mi_page_is_in_full(page)) return;
-
-  mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page);
+  mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
   _mi_page_free_collect(page,false);  // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
 }
 
@@ -345,18 +343,17 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_internal(page != NULL);
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(pq == mi_page_queue_of(page));
-  mi_assert_internal(page->heap != NULL);
+  mi_assert_internal(mi_page_heap(page) != NULL);
 
-#if MI_DEBUG > 1
-  mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
-#endif
+  mi_heap_t* pheap = mi_page_heap(page);
 
   // remove from our page list
-  mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
+  mi_segments_tld_t* segments_tld = &pheap->tld->segments;
   mi_page_queue_remove(pq, page);
 
   // page is no longer associated with our heap
-  mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
+  mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
+  mi_page_set_heap(page, NULL);
 
 #if MI_DEBUG>1
   // check there are no references left..
@@ -366,7 +363,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
 #endif
 
   // and abandon it
-  mi_assert_internal(page->heap == NULL);
+  mi_assert_internal(mi_page_heap(page) == NULL);
   _mi_segment_page_abandon(page,segments_tld);
 }
 
@@ -377,33 +374,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(pq == mi_page_queue_of(page));
   mi_assert_internal(mi_page_all_free(page));
-  #if MI_DEBUG>1
-  // check if we can safely free
-  mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE);
-  free = mi_atomic_exchange(&page->thread_free, free);
-  mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING);
-  #endif
+  mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
 
+  // no more aligned blocks in here
   mi_page_set_has_aligned(page, false);
 
-  // account for huge pages here
-  // (note: no longer necessary as huge pages are always abandoned)
-  if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) {
-    if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) {
-      _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size);
-    }
-    else {
-      _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
-    }
-  }
-
   // remove from the page list
   // (no need to do _mi_heap_delayed_free first as all blocks are already free)
-  mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
+  mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
   mi_page_queue_remove(pq, page);
 
   // and free it
-  mi_assert_internal(page->heap == NULL);
+  mi_page_set_heap(page,NULL);
   _mi_segment_page_free(page, force, segments_tld);
 }
 
@@ -427,7 +409,7 @@ void _mi_page_retire(mi_page_t* page) {
   // how to check this efficiently though...
   // for now, we don't retire if it is the only page left of this size class.
   mi_page_queue_t* pq = mi_page_queue_of(page);
-  if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
+  if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) {
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
       page->retire_expire = 4;
@@ -469,15 +451,15 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
 #define MI_MAX_SLICES       (1UL << MI_MAX_SLICE_SHIFT)
 #define MI_MIN_SLICES       (2)
 
-static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) {
+static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) {
   UNUSED(stats);
   #if (MI_SECURE<=2)
   mi_assert_internal(page->free == NULL);
   mi_assert_internal(page->local_free == NULL);
   #endif
   mi_assert_internal(page->capacity + extend <= page->reserved);
+  mi_assert_internal(bsize == mi_page_block_size(page));
   void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
-  const size_t bsize = page->block_size;
 
   // initialize a randomized free list
   // set up `slice_count` slices to alternate between
@@ -491,7 +473,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
   mi_block_t* blocks[MI_MAX_SLICES];   // current start of the slice
   size_t      counts[MI_MAX_SLICES];   // available objects in the slice
   for (size_t i = 0; i < slice_count; i++) {
-    blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend);
+    blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend);
     counts[i] = slice_extend;
   }
   counts[slice_count-1] += (extend % slice_count);  // final slice holds the modulus too (todo: distribute evenly?)
@@ -526,7 +508,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
   page->free = free_start;
 }
 
-static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
+static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats)
 {
   UNUSED(stats);
   #if (MI_SECURE <= 2)
@@ -534,12 +516,13 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
   mi_assert_internal(page->local_free == NULL);
   #endif
   mi_assert_internal(page->capacity + extend <= page->reserved);
+  mi_assert_internal(bsize == mi_page_block_size(page));
   void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
-  const size_t bsize = page->block_size;
-  mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity);
+  
+  mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity);
 
   // initialize a sequential free list
-  mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
+  mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1);
   mi_block_t* block = start;
   while(block <= last) {
     mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
@@ -581,8 +564,9 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
   mi_stat_counter_increase(tld->stats.pages_extended, 1);
 
   // calculate the extend count
+  const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size);
   size_t extend = page->reserved - page->capacity;
-  size_t max_extend = (page->block_size >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)page->block_size);
+  size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
   if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND;
 
   if (extend > max_extend) {
@@ -596,20 +580,20 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
 
   // commit on-demand for large and huge pages?
   if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
-    uint8_t* start = page_start + (page->capacity * page->block_size);
-    _mi_mem_commit(start, extend * page->block_size, NULL, &tld->os);
+    uint8_t* start = page_start + (page->capacity * bsize);
+    _mi_mem_commit(start, extend * bsize, NULL, &tld->os);
   }
 
   // and append the extend the free list
   if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) {
-    mi_page_free_list_extend(page, extend, &tld->stats );
+    mi_page_free_list_extend(page, bsize, extend, &tld->stats );
   }
   else {
-    mi_page_free_list_extend_secure(heap, page, extend, &tld->stats);
+    mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats);
   }
   // enable the new free list
   page->capacity += (uint16_t)extend;
-  mi_stat_increase(tld->stats.page_committed, extend * page->block_size);
+  mi_stat_increase(tld->stats.page_committed, extend * bsize);
 
   // extension into zero initialized memory preserves the zero'd free list
   if (!page->is_zero_init) {
@@ -625,9 +609,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert(segment != NULL);
   mi_assert_internal(block_size > 0);
   // set fields
+  mi_page_set_heap(page, heap);
   size_t page_size;
   _mi_segment_page_start(segment, page, block_size, &page_size, NULL);
-  page->block_size = block_size;
+  page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
   #ifdef MI_ENCODE_FREELIST
@@ -639,14 +624,14 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(page->capacity == 0);
   mi_assert_internal(page->free == NULL);
   mi_assert_internal(page->used == 0);
-  mi_assert_internal(page->thread_free == 0);
-  mi_assert_internal(page->thread_freed == 0);
+  mi_assert_internal(page->xthread_free == 0);
   mi_assert_internal(page->next == NULL);
   mi_assert_internal(page->prev == NULL);
   mi_assert_internal(page->retire_expire == 0);
   mi_assert_internal(!mi_page_has_aligned(page));
   #if (MI_ENCODE_FREELIST)
-  mi_assert_internal(page->key != 0);
+  mi_assert_internal(page->key[1] != 0);
+  mi_assert_internal(page->key[2] != 0);
   #endif
   mi_assert_expensive(mi_page_is_valid_init(page));
 
@@ -664,34 +649,19 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
 static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
 {
   // search through the pages in "next fit" order
-  mi_page_t* rpage = NULL;
   size_t count = 0;
-  size_t page_free_count = 0;
   mi_page_t* page = pq->first;
-  while( page != NULL)
+  while (page != NULL)
   {
     mi_page_t* next = page->next; // remember next
     count++;
 
     // 0. collect freed blocks by us and other threads
-    _mi_page_free_collect(page,false);
+    _mi_page_free_collect(page, false);
 
     // 1. if the page contains free blocks, we are done
     if (mi_page_immediate_available(page)) {
-      // If all blocks are free, we might retire this page instead.
-      // do this at most 8 times to bound allocation time.
-      // (note: this can happen if a page was earlier not retired due
-      //  to having neighbours that were mostly full or due to concurrent frees)
-      if (page_free_count < 8 && mi_page_all_free(page)) {
-        page_free_count++;
-        if (rpage != NULL) _mi_page_free(rpage,pq,false);
-        rpage = page;
-        page = next;
-        continue;     // and keep looking
-      }
-      else {
-        break;  // pick this one
-      }
+      break;  // pick this one
     }
 
     // 2. Try to extend
@@ -704,20 +674,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
     // 3. If the page is completely full, move it to the `mi_pages_full`
     // queue so we don't visit long-lived pages too often.
     mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
-    mi_page_to_full(page,pq);
+    mi_page_to_full(page, pq);
 
     page = next;
   } // for each page
 
-  mi_stat_counter_increase(heap->tld->stats.searches,count);
-
-  if (page == NULL) {
-    page = rpage;
-    rpage = NULL;
-  }
-  if (rpage != NULL) {
-    _mi_page_free(rpage,pq,false);
-  }
+  mi_stat_counter_increase(heap->tld->stats.searches, count);
 
   if (page == NULL) {
     page = mi_page_fresh(heap, pq);
@@ -729,11 +691,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
   mi_assert_internal(page == NULL || mi_page_immediate_available(page));
 
   // finally collect retired pages
-  _mi_heap_collect_retired(heap,false);
+  _mi_heap_collect_retired(heap, false);
   return page;
 }
 
 
+
 // Find a page with free blocks of `size`.
 static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
   mi_page_queue_t* pq = mi_page_queue(heap,size);
@@ -794,14 +757,15 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
   mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
   mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size);
   if (page != NULL) {
+    const size_t bsize = mi_page_block_size(page);
     mi_assert_internal(mi_page_immediate_available(page));
-    mi_assert_internal(page->block_size == block_size);
+    mi_assert_internal(bsize >= size);
     mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE);
     mi_assert_internal(_mi_page_segment(page)->used==1);
     mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
-    mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
+    mi_page_set_heap(page, NULL);
 
-    if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) {
+    if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
       _mi_stat_increase(&heap->tld->stats.giant, block_size);
       _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1);
     }
@@ -849,7 +813,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
   if (page == NULL) return NULL; // out of memory
 
   mi_assert_internal(mi_page_immediate_available(page));
-  mi_assert_internal(page->block_size >= size);
+  mi_assert_internal(mi_page_block_size(page) >= size);
 
   // and try again, this time succeeding! (i.e. this should never recurse)
   return _mi_page_malloc(heap, page, size);
diff --git a/src/segment.c b/src/segment.c
index 97859fa9..4fb3e28b 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -208,8 +208,8 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m
   mi_assert_internal(size <= psize);
   size_t reset_size = (size == 0 || size > psize ? psize : size);
   if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
-    mi_assert_internal(page->block_size > 0);
-    reset_size = page->capacity * page->block_size;
+    mi_assert_internal(page->xblock_size > 0);
+    reset_size = page->capacity * mi_page_block_size(page);
   }
   _mi_mem_reset(start, reset_size, tld->os);
 }
@@ -223,8 +223,8 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size,
   uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
   size_t unreset_size = (size == 0 || size > psize ? psize : size);
   if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
-    mi_assert_internal(page->block_size > 0);
-    unreset_size = page->capacity * page->block_size;
+    mi_assert_internal(page->xblock_size > 0);
+    unreset_size = page->capacity * mi_page_block_size(page);
   }
   bool is_zero = false;
   _mi_mem_unreset(start, unreset_size, &is_zero, tld->os);
@@ -255,7 +255,7 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_
   }
 
   if (page_size != NULL) *page_size = psize;
-  mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page);
+  mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page);
   mi_assert_internal(_mi_ptr_segment(p) == segment);
   return p;
 }
@@ -278,7 +278,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
   }
 
   if (page_size != NULL) *page_size = psize;
-  mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page);
+  mi_assert_internal(page->xblock_size==0 || _mi_ptr_page(p) == page);
   mi_assert_internal(_mi_ptr_segment(p) == segment);
   return p;
 }
@@ -605,7 +605,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   mi_assert_internal(page->segment_in_use);
   mi_assert_internal(mi_page_all_free(page));
   mi_assert_internal(page->is_committed);
-  size_t inuse = page->capacity * page->block_size;
+  size_t inuse = page->capacity * mi_page_block_size(page);
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
 
@@ -707,6 +707,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert(page != NULL);
+  mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
+  mi_assert_internal(mi_page_heap(page) == NULL);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_expensive(mi_segment_is_valid(segment));
   segment->abandoned++;
@@ -765,9 +767,12 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
       if (page->segment_in_use) {
         mi_assert_internal(!page->is_reset);
         mi_assert_internal(page->is_committed);
+        mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
+        mi_assert_internal(mi_page_heap(page) == NULL);
         segment->abandoned--;
         mi_assert(page->next == NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
+        _mi_page_free_collect(page, false); // ensure used count is up to date
         if (mi_page_all_free(page)) {
           // if everything free by now, free the page
           mi_segment_page_clear(segment,page,tld);

From ad32eb1dfb2b73ed8eaecfdc14e01cbbf43d05b2 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 15 Jan 2020 17:57:19 -0800
Subject: [PATCH 099/104] eager collect on page reclamation

---
 src/page.c    | 10 +++-------
 src/segment.c |  7 +++++--
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/page.c b/src/page.c
index 40aec0c6..02f10238 100644
--- a/src/page.c
+++ b/src/page.c
@@ -229,16 +229,12 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
 // called from segments when reclaiming abandoned pages
 void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_expensive(mi_page_is_valid_init(page));
-  mi_assert_internal(mi_page_heap(page) == NULL);
+  mi_assert_internal(mi_page_heap(page) == heap);
+  mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
   mi_assert_internal(!page->is_reset);
-  mi_assert_internal(mi_page_thread_free_flag(page) == MI_NEVER_DELAYED_FREE);
-  mi_page_set_heap(page, heap);
   mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
-  mi_page_queue_push(heap, pq, page);
-  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
-  // _mi_page_free_collect(page,false); // no need, as it is just done before reclaim
-  mi_assert_internal(mi_page_heap(page)!= NULL);  
+  mi_page_queue_push(heap, pq, page);    
   mi_assert_expensive(_mi_page_is_valid(page));
 }
 
diff --git a/src/segment.c b/src/segment.c
index 4fb3e28b..d27a7c13 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -772,13 +772,16 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
         segment->abandoned--;
         mi_assert(page->next == NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
+        // set the heap again and allow delayed free again
+        mi_page_set_heap(page, heap);
+        _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
         _mi_page_free_collect(page, false); // ensure used count is up to date
         if (mi_page_all_free(page)) {
-          // if everything free by now, free the page
+          // if everything free already, clear the page directly
           mi_segment_page_clear(segment,page,tld);
         }
         else {
-          // otherwise reclaim it
+          // otherwise reclaim it into the heap
           _mi_page_reclaim(heap,page);
         }
       }

From 9629d731888f64db99e43016c916268a73a5f02f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 15 Jan 2020 18:07:29 -0800
Subject: [PATCH 100/104] fix options

---
 src/options.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/options.c b/src/options.c
index ce21309d..f1d8205f 100644
--- a/src/options.c
+++ b/src/options.c
@@ -67,6 +67,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
+  { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 100, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds

From b8072aaacb581b9655545b9960456c239b7c59af Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 16 Jan 2020 03:54:51 -0800
Subject: [PATCH 101/104] fix debug build

---
 src/heap.c    | 34 +++++++++++++++++-----------------
 src/segment.c |  7 +++++--
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/heap.c b/src/heap.c
index 9f2a4457..12aa0840 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -56,7 +56,8 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   mi_assert_expensive(_mi_page_is_valid(page));
   return true;
 }
-
+#endif
+#if MI_DEBUG>=3
 static bool mi_heap_is_valid(mi_heap_t* heap) {
   mi_assert_internal(heap!=NULL);
   mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
@@ -111,7 +112,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
 {
   if (!mi_heap_is_initialized(heap)) return;
   _mi_deferred_free(heap, collect > NORMAL);
-  
+
   // collect (some) abandoned pages
   if (collect >= NORMAL && !heap->no_reclaim) {
     if (collect == NORMAL) {
@@ -123,8 +124,8 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
               collect == FORCE
               #else
               collect >= FORCE
-              #endif 
-              && _mi_is_main_thread() && mi_heap_is_backing(heap)) 
+              #endif
+              && _mi_is_main_thread() && mi_heap_is_backing(heap))
     {
       // the main thread is abandoned, try to free all abandoned segments.
       // if all memory is freed by now, all segments should be freed.
@@ -135,19 +136,19 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   // if abandoning, mark all pages to no longer add to delayed_free
   if (collect == ABANDON) {
     //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) {
-    //  _mi_page_use_delayed_free(page, false);  // set thread_free.delayed to MI_NO_DELAYED_FREE      
-    //}    
+    //  _mi_page_use_delayed_free(page, false);  // set thread_free.delayed to MI_NO_DELAYED_FREE
+    //}
     mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
   }
 
-  // free thread delayed blocks. 
+  // free thread delayed blocks.
   // (if abandoning, after this there are no more local references into the pages.)
   _mi_heap_delayed_free(heap);
 
   // collect all pages owned by this thread
   mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
   mi_assert_internal( collect != ABANDON || heap->thread_delayed_free == NULL );
-  
+
   // collect segment caches
   if (collect >= FORCE) {
     _mi_segment_thread_collect(&heap->tld->segments);
@@ -177,7 +178,7 @@ void mi_collect(bool force) mi_attr_noexcept {
 ----------------------------------------------------------- */
 
 mi_heap_t* mi_heap_get_default(void) {
-  mi_thread_init(); 
+  mi_thread_init();
   return mi_get_default_heap();
 }
 
@@ -198,7 +199,7 @@ mi_heap_t* mi_heap_new(void) {
   heap->tld = bheap->tld;
   heap->thread_id = _mi_thread_id();
   _mi_random_split(&bheap->random, &heap->random);
-  heap->cookie = _mi_heap_random_next(heap) | 1;  
+  heap->cookie = _mi_heap_random_next(heap) | 1;
   heap->key[0] = _mi_heap_random_next(heap);
   heap->key[1] = _mi_heap_random_next(heap);
   heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
@@ -226,7 +227,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
 static void mi_heap_free(mi_heap_t* heap) {
   mi_assert_internal(mi_heap_is_initialized(heap));
   if (mi_heap_is_backing(heap)) return; // dont free the backing heap
-  
+
   // reset default
   if (mi_heap_is_default(heap)) {
     _mi_heap_set_default_direct(heap->tld->heap_backing);
@@ -247,7 +248,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   UNUSED(pq);
 
   // ensure no more thread_delayed_free will be added
-  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);  
+  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
 
   // stats
   const size_t bsize = mi_page_block_size(page);
@@ -311,7 +312,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
   if (from==NULL || from->page_count == 0) return;
 
   // unfull all full pages in the `from` heap
-  mi_page_t* page = from->pages[MI_BIN_FULL].first; 
+  mi_page_t* page = from->pages[MI_BIN_FULL].first;
   while (page != NULL) {
     mi_page_t* next = page->next;
     _mi_page_unfull(page);
@@ -323,7 +324,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
   _mi_heap_delayed_free(from);
 
   // transfer all pages by appending the queues; this will set
-  // a new heap field which is ok as all pages are unfull'd and thus 
+  // a new heap field which is ok as all pages are unfull'd and thus
   // other threads won't access this field anymore (see `mi_free_block_mt`)
   for (size_t i = 0; i < MI_BIN_FULL; i++) {
     mi_page_queue_t* pq = &heap->pages[i];
@@ -334,7 +335,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
   }
   mi_assert_internal(from->thread_delayed_free == NULL);
   mi_assert_internal(from->page_count == 0);
-  
+
   // and reset the `from` heap
   mi_heap_reset_pages(from);
 }
@@ -362,7 +363,7 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
   mi_assert(mi_heap_is_initialized(heap));
   if (!mi_heap_is_initialized(heap)) return NULL;
   mi_assert_expensive(mi_heap_is_valid(heap));
-  mi_heap_t* old = mi_get_default_heap(); 
+  mi_heap_t* old = mi_get_default_heap();
   _mi_heap_set_default_direct(heap);
   return old;
 }
@@ -534,4 +535,3 @@ bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_vis
   mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
   return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args);
 }
-
diff --git a/src/segment.c b/src/segment.c
index ee1de005..3f99c790 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -135,7 +135,7 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) {
 }
 
 
-#if (MI_DEBUG>=3)
+#if (MI_DEBUG>=2)
 static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) {
   mi_page_t* p = tld->pages_reset.first;
   while (p != NULL) {
@@ -144,7 +144,9 @@ static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tl
   }
   return false;
 }
+#endif
 
+#if (MI_DEBUG>=3)
 static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment != NULL);
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
@@ -169,6 +171,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
 #endif
 
 static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) {
+  mi_assert_internal(page != NULL);
   if (page->next != NULL || page->prev != NULL) {
     mi_assert_internal(mi_pages_reset_contains(page, tld));
     return false;
@@ -1052,6 +1055,6 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
   mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
   mi_reset_delayed(tld);
-  mi_assert_internal(mi_page_not_in_queue(page, tld));
+  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
   return page;
 }

From 8d8f355ed0190702edcce7d16d9fdad7466ae2b7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 16 Jan 2020 11:25:02 -0800
Subject: [PATCH 102/104] add option to reset eagerly when a segment is
 abandoned

---
 include/mimalloc.h |  1 +
 src/options.c      |  1 +
 src/segment.c      | 19 ++++++++++++-------
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index fe09c7f2..e45b7e4d 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -273,6 +273,7 @@ typedef enum mi_option_e {
   mi_option_reserve_huge_os_pages,
   mi_option_segment_cache,
   mi_option_page_reset,
+  mi_option_abandoned_page_reset,
   mi_option_segment_reset,
   mi_option_eager_commit_delay,
   mi_option_reset_delay,
diff --git a/src/options.c b/src/options.c
index f1d8205f..c12c77e0 100644
--- a/src/options.c
+++ b/src/options.c
@@ -68,6 +68,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
+  { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 100, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
diff --git a/src/segment.c b/src/segment.c
index 3f99c790..ea030d7a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -326,12 +326,15 @@ static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) {
   page->used = 0;
 }
 
-static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  if (segment->mem_is_fixed) return;
+static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool force_reset, mi_segments_tld_t* tld) {
+  if (segment->mem_is_fixed) return; // never reset in huge OS pages
   for (size_t i = 0; i < segment->capacity; i++) {
     mi_page_t* page = &segment->pages[i];
     if (!page->segment_in_use && !page->is_reset) {
       mi_pages_reset_remove(page, tld);
+      if (force_reset) {
+        mi_page_reset(segment, page, 0, tld); 
+      }
     }
     else {
       mi_assert_internal(mi_page_not_in_queue(page,tld));
@@ -668,9 +671,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
 
 
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
-  UNUSED(force);  
-  mi_assert(segment != NULL);
-  mi_pages_reset_remove_all_in_segment(segment, tld);
+  UNUSED(force);
+  mi_assert(segment != NULL);  
+  // note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse)
+  bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset));
+  mi_pages_reset_remove_all_in_segment(segment, force_reset, tld);
   mi_segment_remove_from_free_queue(segment,tld);
 
   mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment));
@@ -840,8 +845,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
 
   // remove the segment from the free page queue if needed
-  mi_reset_delayed(tld);
-  mi_pages_reset_remove_all_in_segment(segment, tld); // do not force reset on free pages in an abandoned segment, as it is already done in segment_thread_collect
+  mi_reset_delayed(tld); 
+  mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); 
   mi_segment_remove_from_free_queue(segment, tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
 

From 4e91eab8fca9dfa95f74a7205f8f216dd9f22f02 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 16 Jan 2020 14:12:37 -0800
Subject: [PATCH 103/104] specialize mi_mallocn for count=1

---
 src/alloc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/alloc.c b/src/alloc.c
index 621fb0db..be63f86a 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -443,7 +443,12 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
 // Uninitialized `calloc`
 extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
   size_t total;
-  if (mi_mul_overflow(count,size,&total)) return NULL;
+  if (count==1) {
+    total = size;
+  }
+  else if (mi_mul_overflow(count, size, &total)) {
+    return NULL;
+  }
   return mi_heap_malloc(heap, total);
 }
 

From 24f8bcbc8f4236b2bd37b1c8bfc169ec9a941942 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 16 Jan 2020 14:25:09 -0800
Subject: [PATCH 104/104] add explicit calling convention to registered
 functions

---
 include/mimalloc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index e45b7e4d..de4282da 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -108,10 +108,10 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize)
 mi_decl_export size_t mi_usable_size(const void* p)   mi_attr_noexcept;
 mi_decl_export size_t mi_good_size(size_t size)       mi_attr_noexcept;
 
-typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
+typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
 mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept;
 
-typedef void (mi_output_fun)(const char* msg, void* arg);
+typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg);
 mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept;
 
 mi_decl_export void mi_collect(bool force)    mi_attr_noexcept;