From 6a80fa3ac1a3ec10b79a651ced93f286abe096c6 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sat, 5 Nov 2022 14:46:52 -0700
Subject: [PATCH 01/88] initial progress on removing alignment limit

---
 ide/vs2022/mimalloc-override.vcxproj |  2 +-
 ide/vs2022/mimalloc.vcxproj          |  2 +-
 include/mimalloc-internal.h          | 12 ++++++---
 include/mimalloc-types.h             |  2 ++
 src/alloc.c                          | 10 +++++---
 src/arena.c                          | 13 +++++-----
 src/os.c                             | 37 ++++++++++++++++++++++++++++
 src/page.c                           | 22 ++++++++---------
 src/region.c                         | 21 ++++++++--------
 src/segment.c                        | 36 ++++++++++++++++++---------
 test/test-api.c                      |  8 +++---
 11 files changed, 114 insertions(+), 51 deletions(-)
diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj
index d674108b..87b0a1e4 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@@ -123,7 +123,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=4;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <SupportJustMyCode>false</SupportJustMyCode>
       <CompileAs>Default</CompileAs>
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 29f732d1..9081881c 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -116,7 +116,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=4;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp20</LanguageStandard>
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 23981240..d3e689ae 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -81,9 +81,13 @@ void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free th
 size_t     _mi_os_good_alloc_size(size_t size);
 bool       _mi_os_has_overcommit(void);
 
+void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
+void       _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
+
+
 // memory.c
-void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld);
-void       _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld);
+void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld);
+void       _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld);
 
 bool       _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
 bool       _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
@@ -94,7 +98,7 @@ bool       _mi_mem_unprotect(void* addr, size_t size);
 void        _mi_mem_collect(mi_os_tld_t* tld);
 
 // "segment.c"
-mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
+mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
 void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
 void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
 uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page
@@ -107,7 +111,7 @@ void       _mi_abandoned_await_readers(void);
 
 
 // "page.c"
-void*      _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero)  mi_attr_noexcept mi_attr_malloc;
+void*      _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment)  mi_attr_noexcept mi_attr_malloc;
 
 void       _mi_page_retire(mi_page_t* page) mi_attr_noexcept;                  // free the page if there are no other pages with many free blocks
 void       _mi_page_unfull(mi_page_t* page);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index bca0ad61..79d04d2d 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -301,6 +301,8 @@ typedef struct mi_segment_s {
   size_t               memid;            // id for the os-level memory manager
   bool                 mem_is_pinned;    // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
   bool                 mem_is_committed; // `true` if the whole segment is eagerly committed  
+  size_t               mem_alignment;    // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX)
+  size_t               mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX)
 
   // segment fields
   _Atomic(struct mi_segment_s*) abandoned_next;
diff --git a/src/alloc.c b/src/alloc.c
index af255f67..284db29c 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -30,7 +30,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
   mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
   mi_block_t* const block = page->free;
   if mi_unlikely(block == NULL) {
-    return _mi_malloc_generic(heap, size, zero); 
+    return _mi_malloc_generic(heap, size, zero, 0); 
   }
   mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
   // pop from the free list
@@ -117,14 +117,14 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si
 }
 
 // The main allocation function
-extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
+static inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept {
   if mi_likely(size <= MI_SMALL_SIZE_MAX) {
     return mi_heap_malloc_small_zero(heap, size, zero);
   }
   else {
     mi_assert(heap!=NULL);
     mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id());   // heaps are thread local
-    void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero);  // note: size can overflow but it is detected in malloc_generic
+    void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment);  // note: size can overflow but it is detected in malloc_generic
     mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
     #if MI_STAT>1
     if (p != NULL) {
@@ -137,6 +137,10 @@ extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero
   }
 }
 
+extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
+  return _mi_heap_malloc_zero_ex(heap, size, zero, 0);
+}
+
 mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
   return _mi_heap_malloc_zero(heap, size, false);
 }
diff --git a/src/arena.c b/src/arena.c
index ce716089..5aef95f7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -190,7 +190,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
   return p;
 }
 
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
                               mi_arena_id_t arena_id, size_t* memid, mi_os_tld_t* tld)
 {
   mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL);
@@ -201,7 +201,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
 
   // try to allocate in an arena if the alignment is small enough
   // and the object is not too large or too small.
-  if (alignment <= MI_SEGMENT_ALIGN &&
+  if (alignment <= MI_SEGMENT_ALIGN && align_offset == 0 &&
       size >= MI_ARENA_MIN_OBJ_SIZE &&
       mi_atomic_load_relaxed(&mi_arena_count) > 0)
   {
@@ -256,14 +256,14 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
   }
   *is_zero = true;
   *memid   = MI_MEMID_OS;  
-  void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats);
+  void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats);
   if (p != NULL) *is_pinned = *large;
   return p;
 }
 
 void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t arena_id, size_t* memid, mi_os_tld_t* tld)
 {
-  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, arena_id, memid, tld);
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, arena_id, memid, tld);
 }
 
 
@@ -281,16 +281,17 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
   Arena free
 ----------------------------------------------------------- */
 
-void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats) {
+void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) {
   mi_assert_internal(size > 0 && stats != NULL);
   if (p==NULL) return;
   if (size==0) return;
   if (memid == MI_MEMID_OS) {
     // was a direct OS allocation, pass through
-    _mi_os_free_ex(p, size, all_committed, stats);
+    _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats);
   }
   else {
     // allocated in an arena
+    mi_assert_internal(align_offset == 0);
     size_t arena_idx;
     size_t bitmap_idx;
     mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
diff --git a/src/os.c b/src/os.c
index fe9c2959..57b34a2c 100644
--- a/src/os.c
+++ b/src/os.c
@@ -840,8 +840,45 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
   return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ );
 }
 
+/* -----------------------------------------------------------
+  OS aligned allocation with an offset. This is used
+  for large alignments > MI_SEGMENT_SIZE so we can align
+  the first page at an offset from the start of the segment.
+  As we may need to overallocate, we need to free such pointers
+  using `mi_free_aligned` to use the actual start of the 
+  memory region.
+----------------------------------------------------------- */
 
 
+void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) {
+  mi_assert(offset <= MI_SEGMENT_SIZE);
+  mi_assert(offset <= size);
+  mi_assert((alignment % _mi_os_page_size()) == 0);
+  if (offset > MI_SEGMENT_SIZE) return NULL;
+  if (offset == 0) {
+    return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats);
+  }
+  else {
+    const size_t extra = _mi_align_up(offset, alignment) - offset;
+    const size_t oversize = size + extra;
+    void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats);
+    if (start == NULL) return NULL;
+    void* p = (uint8_t*)start + extra;
+    mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
+    if (commit && extra > _mi_os_page_size()) {
+      _mi_os_decommit(start, extra, tld_stats);
+    }
+    return p;
+  }
+}
+
+void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) {
+  mi_assert(align_offset <= MI_SEGMENT_SIZE);
+  const size_t extra = _mi_align_up(align_offset, alignment) - align_offset;
+  void* start = (uint8_t*)p - extra;
+  _mi_os_free_ex(start, size + extra, was_committed, tld_stats);  
+}
+
 /* -----------------------------------------------------------
   OS memory API: reset, commit, decommit, protect, unprotect.
 ----------------------------------------------------------- */
diff --git a/src/page.c b/src/page.c
index 26b9c9f1..49662dba 100644
--- a/src/page.c
+++ b/src/page.c
@@ -252,10 +252,10 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
 }
 
 // allocate a fresh page from a segment
-static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
+static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) {
   mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq));
   mi_assert_internal(pq==NULL||block_size == pq->block_size);
-  mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os);
+  mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
   if (page == NULL) {
     // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
     return NULL;
@@ -272,7 +272,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
 // Get a fresh page to use
 static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
   mi_assert_internal(mi_heap_contains_queue(heap, pq));
-  mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size);
+  mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0);
   if (page==NULL) return NULL;
   mi_assert_internal(pq->block_size==mi_page_block_size(page));
   mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
@@ -790,10 +790,10 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
 // Because huge pages contain just one block, and the segment contains
 // just that page, we always treat them as abandoned and any thread
 // that frees the block can free the whole page and segment directly.
-static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
+static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
   size_t block_size = _mi_os_good_alloc_size(size);
   mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE);
-  mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size);
+  mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size,page_alignment);
   if (page != NULL) {
     const size_t bsize = mi_page_block_size(page);  // note: not `mi_page_usable_block_size` as `size` includes padding already
     mi_assert_internal(bsize >= size);
@@ -818,16 +818,16 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
 
 // Allocate a page
 // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
   // huge allocation?
   const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
-  if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) ) {
+  if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
     if mi_unlikely(req_size > PTRDIFF_MAX) {  // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
       _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
       return NULL;
     }
     else {
-      return mi_huge_page_alloc(heap,size);
+      return mi_huge_page_alloc(heap,size,huge_alignment);
     }
   }
   else {
@@ -839,7 +839,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
 
 // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
 // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
+void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept
 {
   mi_assert_internal(heap != NULL);
 
@@ -858,10 +858,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexce
   _mi_heap_delayed_free_partial(heap);
 
   // find (or allocate) a page of the right size
-  mi_page_t* page = mi_find_page(heap, size);
+  mi_page_t* page = mi_find_page(heap, size, huge_alignment);
   if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more
     mi_heap_collect(heap, true /* force */);
-    page = mi_find_page(heap, size);
+    page = mi_find_page(heap, size, huge_alignment);
   }
 
   if mi_unlikely(page == NULL) { // out of memory
diff --git a/src/region.c b/src/region.c
index 8b04387d..c3f2c8a2 100644
--- a/src/region.c
+++ b/src/region.c
@@ -50,9 +50,9 @@ bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 
 // arena.c
 mi_arena_id_t _mi_arena_id_none(void);
-void    _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats);
+void    _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
 void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
 
 
 
@@ -181,7 +181,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   bool is_zero = false;
   bool is_pinned = false;
   size_t arena_memid = 0;
-  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
+  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
   if (start == NULL) return false;
   mi_assert_internal(!(region_large && !allow_large));
   mi_assert_internal(!region_large || region_commit);
@@ -190,7 +190,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
   if (idx >= MI_REGION_MAX) {
     mi_atomic_decrement_acq_rel(&regions_count);
-    _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
+    _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats);
     _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
     return false;
   }
@@ -347,7 +347,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
 
 // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
 // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
   mi_assert_internal(memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
@@ -363,7 +363,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
   void* p = NULL;
   size_t arena_memid;
   const size_t blocks = mi_region_block_count(size);
-  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
+  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
     p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);    
     if (p == NULL) {
       _mi_warning_message("unable to allocate from region: size %zu\n", size);
@@ -371,7 +371,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
   }
   if (p == NULL) {
     // and otherwise fall back to the OS
-    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
+    p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
     *memid = mi_memid_create_from_arena(arena_memid);
   }
 
@@ -391,7 +391,7 @@ Free
 -----------------------------------------------------------------------------*/
 
 // Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
+void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
   mi_assert_internal(size > 0 && tld != NULL);
   if (p==NULL) return;
   if (size==0) return;
@@ -402,10 +402,11 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
   mem_region_t* region;
   if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
    // was a direct arena allocation, pass through
-    _mi_arena_free(p, size, arena_memid, full_commit, tld->stats);
+    _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats);
   }
   else {
     // allocated in a region
+    mi_assert_internal(align_offset == 0);
     mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
     const size_t blocks = mi_region_block_count(size);
     mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
@@ -469,7 +470,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
         mi_atomic_store_release(&region->info, (size_t)0);
         if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {         
           _mi_abandoned_await_readers(); // ensure no pending reads
-          _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
+          _mi_arena_free(start, MI_REGION_SIZE, 0, 0, arena_memid, (~commit == 0), tld->stats);
         }
       }
     }
diff --git a/src/segment.c b/src/segment.c
index 68174bb2..225ecd2a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -475,7 +475,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
   if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) {
     fully_committed = false;
   }
-  _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os);
+  _mi_mem_free(segment, segment_size, segment->mem_alignment, segment->mem_align_offset, segment->memid, fully_committed, any_reset, tld->os);
 }
 
 // called by threads that are terminating to free cached segments
@@ -495,7 +495,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
 ----------------------------------------------------------- */
 
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
   // the segment parameter is non-null if it came from our cache
   mi_assert_internal(segment==NULL || (required==0 && page_kind <= MI_PAGE_LARGE));
@@ -507,7 +507,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     capacity = 1;
   }
   else {
-    mi_assert_internal(required == 0);
+    mi_assert_internal(required == 0 && page_alignment == 0);
     size_t page_size = (size_t)1 << page_shift;
     capacity = MI_SEGMENT_SIZE / page_size;
     mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0);
@@ -571,7 +571,13 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     size_t memid;
     bool   mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
     bool   is_pinned = false;
-    segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
+    size_t align_offset = 0;
+    size_t alignment = MI_SEGMENT_SIZE;
+    if (page_alignment > 0) {
+      align_offset = pre_size;
+      alignment = page_alignment;
+    }
+    segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {
       // ensure the initial info is committed
@@ -581,7 +587,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
       if (commit_zero) is_zero = true;
       if (!ok) {
         // commit failed; we cannot touch the memory: free the segment directly and return `NULL`
-        _mi_mem_free(segment, MI_SEGMENT_SIZE, memid, false, false, os_tld);
+        _mi_mem_free(segment, segment_size, alignment, align_offset, memid, false, false, os_tld);
         return NULL;  
       }
     }
@@ -589,6 +595,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     segment->memid = memid;
     segment->mem_is_pinned = (mem_large || is_pinned);
     segment->mem_is_committed = commit;    
+    segment->mem_alignment = alignment;
+    segment->mem_align_offset = align_offset;
     mi_segments_track_size((long)segment_size, tld);
   }  
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
@@ -637,8 +645,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   return segment;
 }
 
-static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
-  return mi_segment_init(NULL, required, page_kind, page_shift, tld, os_tld);
+static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
+  return mi_segment_init(NULL, required, page_kind, page_shift, page_alignment, tld, os_tld);
 }
 
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
@@ -1169,7 +1177,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s
     return segment;
   }
   // 2. otherwise allocate a fresh segment
-  return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld);
+  return mi_segment_alloc(0, page_kind, page_shift, 0, tld, os_tld);
 }
 
 
@@ -1241,15 +1249,16 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size
   return page;
 }
 
-static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
-  mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld);
+  mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT, page_alignment, tld, os_tld);
   if (segment == NULL) return NULL;
   mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size);
   segment->thread_id = 0; // huge pages are immediately abandoned
   mi_segments_track_size(-(long)segment->segment_size, tld);
   mi_page_t* page = mi_segment_find_free(segment, tld);
   mi_assert_internal(page != NULL);
+  mi_assert_internal(page_alignment == 0 || _mi_is_aligned(_mi_page_start(segment, page, NULL),page_alignment));
   return page;
 }
 
@@ -1285,8 +1294,11 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
    Page allocation
 ----------------------------------------------------------- */
 
-mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
+mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_page_t* page;
+  if (page_alignment <= MI_ALIGNMENT_MAX) {
+    page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld);
+  }
   if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
     page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld);
   }
@@ -1297,7 +1309,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment
     page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld);
   }
   else {
-    page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
+    page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld);
   }
   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
   mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
diff --git a/test/test-api.c b/test/test-api.c
index 3c2ef7e4..650056e2 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -161,10 +161,12 @@ int main(void) {
     result = ok;
   };
   CHECK_BODY("malloc-aligned7") {
-    void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX); mi_free(p);
-    };
+    void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX);
+    mi_free(p);
+  };
   CHECK_BODY("malloc-aligned8") {
-    void* p = mi_malloc_aligned(1024,2*MI_ALIGNMENT_MAX); mi_free(p);
+    void* p = mi_malloc_aligned(1024,2*MI_ALIGNMENT_MAX); 
+    mi_free(p);
   };
   CHECK_BODY("malloc-aligned-at1") {
     void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);

From 4b91ff760ddd9c7ed7d4e2ddf33d987f81779f58 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sat, 5 Nov 2022 16:40:42 -0700
Subject: [PATCH 02/88] initial working large alignment

---
 include/mimalloc-internal.h |  6 +++--
 include/mimalloc.h          |  2 +-
 src/alloc-aligned.c         | 50 ++++++++++++++++++++++++++-----------
 src/alloc.c                 |  3 ++-
 src/page.c                  |  2 +-
 src/region.c                |  4 +--
 src/segment.c               | 28 ++++++++++++++++-----
 7 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index d3e689ae..6b4b86a2 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -147,6 +147,7 @@ mi_msecs_t  _mi_clock_start(void);
 // "alloc.c"
 void*       _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept;  // called from `_mi_malloc_generic`
 void*       _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
+void*       _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept;     // called from `_mi_heap_malloc_aligned`
 void*       _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
 mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
 bool        _mi_free_delayed_block(mi_block_t* block);
@@ -426,7 +427,8 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {
 // Segment that contains the pointer
 static inline mi_segment_t* _mi_ptr_segment(const void* p) {
   // mi_assert_internal(p != NULL);
-  return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK);
+  if (p == NULL) return NULL;
+  return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
 }
 
 // Segment belonging to a page
@@ -440,7 +442,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
 static inline size_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) {
   // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0];  // huge pages
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
-  mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE);
+  mi_assert_internal(diff >= 0 && (size_t)diff <= MI_SEGMENT_SIZE /* for huge alignment it can be equal */);
   size_t idx = (size_t)diff >> segment->page_shift;
   mi_assert_internal(idx < segment->capacity);
   mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
diff --git a/include/mimalloc.h b/include/mimalloc.h
index dec0fc0c..17fd1c60 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -166,7 +166,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
 // Note that `alignment` always follows `size` for consistency with unaligned
 // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
 // -------------------------------------------------------------------------------------
-#define MI_ALIGNMENT_MAX   (1024*1024UL)    // maximum supported alignment is 1MiB
+#define MI_ALIGNMENT_MAX   (2*1024*1024UL)    // maximum supported alignment is 1MiB
 
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 5672078e..f4ddbf99 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -18,41 +18,59 @@ terms of the MIT license. A copy of the license can be found in the file
 static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
 {
   mi_assert_internal(size <= PTRDIFF_MAX);
-  mi_assert_internal(alignment!=0 && _mi_is_power_of_two(alignment) && alignment <= MI_ALIGNMENT_MAX);
+  mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
 
-  const uintptr_t align_mask = alignment-1;  // for any x, `(x & align_mask) == (x % alignment)`
+  const uintptr_t align_mask = alignment - 1;  // for any x, `(x & align_mask) == (x % alignment)`
   const size_t padsize = size + MI_PADDING_SIZE;
 
   // use regular allocation if it is guaranteed to fit the alignment constraints
-  if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) {
+  if (offset == 0 && alignment <= padsize && padsize <= MI_MEDIUM_OBJ_SIZE_MAX && (padsize & align_mask) == 0) {
     void* p = _mi_heap_malloc_zero(heap, size, zero);
     mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
     return p;
   }
-
-  // otherwise over-allocate
-  const size_t oversize = size + alignment - 1;
-  void* p = _mi_heap_malloc_zero(heap, oversize, zero);
-  if (p == NULL) return NULL;
+  
+  void* p;
+  size_t oversize;
+  if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
+    // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
+    if mi_unlikely(offset != 0) {
+      // todo: cannot support offset alignment for very large alignments yet
+      #if MI_DEBUG > 0
+      _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
+      #endif
+      return NULL;
+    }
+    oversize = size + MI_SEGMENT_SIZE - 1;
+    p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment);
+    if (p == NULL) return NULL;
+    //mi_assert_internal(_mi_is_aligned(p, alignment));    
+  }
+  else {
+    // otherwise over-allocate
+    oversize = size + alignment - 1;
+    p = _mi_heap_malloc_zero(heap, oversize, zero);
+    if (p == NULL) return NULL;
+  }
 
   // .. and align within the allocation
   uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask);
   mi_assert_internal(adjust <= alignment);
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
   if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true);
-  mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
   mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
-
+  mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
+  
   #if MI_TRACK_ENABLED
   if (p != aligned_p) {
     mi_track_free(p);
-    mi_track_malloc(aligned_p,size,zero);
+    mi_track_malloc(aligned_p, size, zero);
   }
   else {
-    mi_track_resize(aligned_p,oversize,size);
+    mi_track_resize(aligned_p, oversize, size);
   }
   #endif
-  return aligned_p;
+  return aligned_p;  
 }
 
 // Primitive aligned allocation
@@ -60,18 +78,20 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
 {
   // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
   mi_assert(alignment > 0);
-  if mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
+  if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
     #endif
     return NULL;
   }
+  /*
   if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {  // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers)
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment);
     #endif
     return NULL;
   }
+  */
   if mi_unlikely(size > PTRDIFF_MAX) {          // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)                                                    
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
@@ -82,7 +102,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
   const size_t padsize = size + MI_PADDING_SIZE;  // note: cannot overflow due to earlier size > PTRDIFF_MAX check
 
   // try first if there happens to be a small block available with just the right alignment
-  if mi_likely(padsize <= MI_SMALL_SIZE_MAX) {
+  if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) {
     mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
     const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
     if mi_likely(page->free != NULL && is_aligned)
diff --git a/src/alloc.c b/src/alloc.c
index 284db29c..9ecb9f30 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -117,8 +117,9 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si
 }
 
 // The main allocation function
-static inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept {
+inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept {
   if mi_likely(size <= MI_SMALL_SIZE_MAX) {
+    mi_assert_internal(huge_alignment == 0);
     return mi_heap_malloc_small_zero(heap, size, zero);
   }
   else {
diff --git a/src/page.c b/src/page.c
index 49662dba..5e2ec826 100644
--- a/src/page.c
+++ b/src/page.c
@@ -792,7 +792,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
 // that frees the block can free the whole page and segment directly.
 static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
   size_t block_size = _mi_os_good_alloc_size(size);
-  mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE);
+  mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
   mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size,page_alignment);
   if (page != NULL) {
     const size_t bsize = mi_page_block_size(page);  // note: not `mi_page_usable_block_size` as `size` includes padding already
diff --git a/src/region.c b/src/region.c
index c3f2c8a2..ea376aa4 100644
--- a/src/region.c
+++ b/src/region.c
@@ -376,7 +376,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset,
   }
 
   if (p != NULL) {
-    mi_assert_internal((uintptr_t)p % alignment == 0);
+    mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0);
     #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
     if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
     #endif
@@ -470,7 +470,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
         mi_atomic_store_release(&region->info, (size_t)0);
         if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {         
           _mi_abandoned_await_readers(); // ensure no pending reads
-          _mi_arena_free(start, MI_REGION_SIZE, 0, 0, arena_memid, (~commit == 0), tld->stats);
+          _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats);
         }
       }
     }
diff --git a/src/segment.c b/src/segment.c
index 225ecd2a..577fa4fd 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -503,7 +503,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   // calculate needed sizes first
   size_t capacity;
   if (page_kind == MI_PAGE_HUGE) {
-    mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0);
+    mi_assert_internal(page_shift == MI_SEGMENT_SHIFT + 1 && required > 0);
     capacity = 1;
   }
   else {
@@ -574,8 +574,9 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     size_t align_offset = 0;
     size_t alignment = MI_SEGMENT_SIZE;
     if (page_alignment > 0) {
-      align_offset = pre_size;
       alignment = page_alignment;
+      align_offset = _mi_align_up( pre_size, MI_SEGMENT_SIZE );
+      segment_size += (align_offset - pre_size);
     }
     segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
@@ -1251,14 +1252,23 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size
 
 static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
-  mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT, page_alignment, tld, os_tld);
+  mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, tld, os_tld);
   if (segment == NULL) return NULL;
   mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size);
   segment->thread_id = 0; // huge pages are immediately abandoned
   mi_segments_track_size(-(long)segment->segment_size, tld);
   mi_page_t* page = mi_segment_find_free(segment, tld);
   mi_assert_internal(page != NULL);
-  mi_assert_internal(page_alignment == 0 || _mi_is_aligned(_mi_page_start(segment, page, NULL),page_alignment));
+#if MI_DEBUG > 3
+  if (page_alignment > 0) {
+    size_t psize;
+    size_t pre_size;
+    void* p = _mi_segment_page_start(segment, page, 0, &psize, &pre_size);
+    void* aligned_p = (void*)_mi_align_up((uintptr_t)p, page_alignment);
+    mi_assert_internal(page_alignment == 0 || _mi_is_aligned(aligned_p, page_alignment));
+    mi_assert_internal(page_alignment == 0 || psize - ((uint8_t*)aligned_p - (uint8_t*)p) >= size);
+  }
+#endif
   return page;
 }
 
@@ -1296,10 +1306,16 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
 
 mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_page_t* page;
-  if (page_alignment <= MI_ALIGNMENT_MAX) {
+  if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) {
+    mi_assert_internal(_mi_is_power_of_two(page_alignment));
+    mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE);
+    //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0);
+    if (page_alignment < MI_SEGMENT_SIZE) {
+      page_alignment = MI_SEGMENT_SIZE;      
+    }
     page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld);
   }
-  if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
+  else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
     page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld);
   }
   else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {

From a200291ae576e1e766be8526c0d058bd9766d480 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 6 Nov 2022 08:26:17 -0800
Subject: [PATCH 03/88] further progress on removing aligned limit

---
 src/alloc-aligned.c | 14 +++++++-------
 src/page.c          |  4 ++--
 test/test-api.c     | 21 +++++++++++++++++++--
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index f4ddbf99..63acd58c 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -29,22 +29,21 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
     mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
     return p;
   }
-  
+
   void* p;
   size_t oversize;
   if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
     // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
     if mi_unlikely(offset != 0) {
       // todo: cannot support offset alignment for very large alignments yet
-      #if MI_DEBUG > 0
+#if MI_DEBUG > 0
       _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
-      #endif
+#endif
       return NULL;
     }
-    oversize = size + MI_SEGMENT_SIZE - 1;
-    p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment);
+    oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
+    p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block
     if (p == NULL) return NULL;
-    //mi_assert_internal(_mi_is_aligned(p, alignment));    
   }
   else {
     // otherwise over-allocate
@@ -57,7 +56,8 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask);
   mi_assert_internal(adjust <= alignment);
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
-  if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true);
+  if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true);  }
+  mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
   mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
   
diff --git a/src/page.c b/src/page.c
index 5e2ec826..6b54eb2c 100644
--- a/src/page.c
+++ b/src/page.c
@@ -262,7 +262,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
   }
   // a fresh page was found, initialize it
   mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
-  mi_page_init(heap, page, block_size, heap->tld);
+  mi_page_init(heap, page, (pq == NULL ? MI_HUGE_BLOCK_SIZE : block_size), heap->tld);
   mi_heap_stat_increase(heap, pages, 1);
   if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
   mi_assert_expensive(_mi_page_is_valid(page));
@@ -643,7 +643,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_track_mem_noaccess(page_start,page_size);
   page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
   mi_assert_internal(page_size / block_size < (1L<<16));
-  page->reserved = (uint16_t)(page_size / block_size);
+  page->reserved = (block_size < MI_HUGE_BLOCK_SIZE ? (uint16_t)(page_size / block_size) : 1);
   #ifdef MI_ENCODE_FREELIST
   page->keys[0] = _mi_heap_random_next(heap);
   page->keys[1] = _mi_heap_random_next(heap);
diff --git a/test/test-api.c b/test/test-api.c
index 650056e2..312b3f1b 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -163,10 +163,27 @@ int main(void) {
   CHECK_BODY("malloc-aligned7") {
     void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX);
     mi_free(p);
+    result = ((uintptr_t)p % MI_ALIGNMENT_MAX) == 0;
   };
   CHECK_BODY("malloc-aligned8") {
-    void* p = mi_malloc_aligned(1024,2*MI_ALIGNMENT_MAX); 
-    mi_free(p);
+    bool ok = true;
+    for (int i = 0; i < 5 && ok; i++) {
+      int n = (1 << i);
+      void* p = mi_malloc_aligned(1024, n * MI_ALIGNMENT_MAX);
+      ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0;
+      mi_free(p);
+    }
+    result = ok;
+  };
+  CHECK_BODY("malloc-aligned9") {
+    bool ok = true;
+    for (int i = 0; i < 5 && ok; i++) {
+      int n = (1 << i);
+      void* p = mi_malloc_aligned( 2*n*MI_ALIGNMENT_MAX, n*MI_ALIGNMENT_MAX);
+      ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0;
+      mi_free(p);
+    }
+    result = ok;
   };
   CHECK_BODY("malloc-aligned-at1") {
     void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);

From f54e64365f7fe8d933944fa0ad633f14fc1539c2 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 6 Nov 2022 09:44:12 -0800
Subject: [PATCH 04/88] fix assertions with incorrect block size for large
 alignments

---
 src/alloc-aligned.c | 1 +
 src/page.c          | 6 ++++--
 src/segment.c       | 5 +++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 63acd58c..3ce01f5c 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -57,6 +57,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   mi_assert_internal(adjust <= alignment);
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
   if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true);  }
+  
   mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
   mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
diff --git a/src/page.c b/src/page.c
index 6b54eb2c..48ee1f56 100644
--- a/src/page.c
+++ b/src/page.c
@@ -262,7 +262,9 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
   }
   // a fresh page was found, initialize it
   mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
-  mi_page_init(heap, page, (pq == NULL ? MI_HUGE_BLOCK_SIZE : block_size), heap->tld);
+  mi_assert_internal(pq!=NULL || page->xblock_size != 0);
+  mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
+  mi_page_init(heap, page, (pq==NULL ? mi_page_block_size(page) : block_size), heap->tld);
   mi_heap_stat_increase(heap, pages, 1);
   if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
   mi_assert_expensive(_mi_page_is_valid(page));
@@ -643,7 +645,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_track_mem_noaccess(page_start,page_size);
   page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
   mi_assert_internal(page_size / block_size < (1L<<16));
-  page->reserved = (block_size < MI_HUGE_BLOCK_SIZE ? (uint16_t)(page_size / block_size) : 1);
+  page->reserved = (uint16_t)(page_size / block_size);
   #ifdef MI_ENCODE_FREELIST
   page->keys[0] = _mi_heap_random_next(heap);
   page->keys[1] = _mi_heap_random_next(heap);
diff --git a/src/segment.c b/src/segment.c
index 577fa4fd..12c9e108 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1269,6 +1269,11 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
     mi_assert_internal(page_alignment == 0 || psize - ((uint8_t*)aligned_p - (uint8_t*)p) >= size);
   }
 #endif
+  // for huge pages we initialize the xblock_size as we may
+  // overallocate to accommodate large alignments.
+  size_t psize;
+  _mi_segment_page_start(segment, page, 0, &psize, NULL);
+  page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : psize);
   return page;
 }
 

From d3715132d165d8d1fefc66f3414acb897502c4fd Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 6 Nov 2022 09:52:54 -0800
Subject: [PATCH 05/88] move null ptr check

---
 include/mimalloc-internal.h | 3 +--
 src/alloc.c                 | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 6b4b86a2..4620fb72 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -426,8 +426,7 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {
 
 // Segment that contains the pointer
 static inline mi_segment_t* _mi_ptr_segment(const void* p) {
-  // mi_assert_internal(p != NULL);
-  if (p == NULL) return NULL;
+  mi_assert_internal(p != NULL);
   return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
 }
 
diff --git a/src/alloc.c b/src/alloc.c
index 9ecb9f30..d10bd586 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -470,8 +470,8 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
   }
 #endif
 
+  if mi_unlikely(p == NULL) return NULL;
   mi_segment_t* const segment = _mi_ptr_segment(p);
-  if mi_unlikely(segment == NULL) return NULL;  // checks also for (p==NULL)
 
 #if (MI_DEBUG>0)
   if mi_unlikely(!mi_is_in_heap_region(p)) {

From f788e3c9a3481d425cea81b037d20107541a087f Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 6 Nov 2022 14:18:52 -0800
Subject: [PATCH 06/88] add comment

---
 src/page.c    | 6 ++++--
 src/segment.c | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/page.c b/src/page.c
index 48ee1f56..414438b4 100644
--- a/src/page.c
+++ b/src/page.c
@@ -260,11 +260,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
     // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
     return NULL;
   }
-  // a fresh page was found, initialize it
   mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
   mi_assert_internal(pq!=NULL || page->xblock_size != 0);
   mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
-  mi_page_init(heap, page, (pq==NULL ? mi_page_block_size(page) : block_size), heap->tld);
+  // a fresh page was found, initialize it
+  const size_t full_block_size = (pq == NULL ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
+  mi_assert_internal(full_block_size >= block_size);
+  mi_page_init(heap, page, full_block_size, heap->tld);
   mi_heap_stat_increase(heap, pages, 1);
   if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
   mi_assert_expensive(_mi_page_is_valid(page));
diff --git a/src/segment.c b/src/segment.c
index 12c9e108..7f80bf8f 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1273,7 +1273,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   // overallocate to accommodate large alignments.
   size_t psize;
   _mi_segment_page_start(segment, page, 0, &psize, NULL);
-  page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : psize);
+  page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize);
   return page;
 }
 

From 3eb616f2bf05f61d857ac3c6464f85a14e48a710 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 6 Nov 2022 14:51:06 -0800
Subject: [PATCH 07/88] fix bug in secure mode where adjustment would make the
 page size less than the blocksize on the first page of a segment

---
 src/page.c    |  1 +
 src/segment.c | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/page.c b/src/page.c
index 48ee1f56..f587fc9a 100644
--- a/src/page.c
+++ b/src/page.c
@@ -646,6 +646,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
+  mi_assert_internal(page->reserved > 0);
   #ifdef MI_ENCODE_FREELIST
   page->keys[0] = _mi_heap_random_next(heap);
   page->keys[1] = _mi_heap_random_next(heap);
diff --git a/src/segment.c b/src/segment.c
index 12c9e108..3c53db6f 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -403,12 +403,14 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
   if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) {
     // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
     size_t adjust = block_size - ((uintptr_t)p % block_size);
-    if (adjust < block_size) {
-      p += adjust;
-      psize -= adjust;
-      if (pre_size != NULL) *pre_size = adjust;
+    if (psize - adjust >= block_size) {
+      if (adjust < block_size) {      
+        p += adjust;
+        psize -= adjust;
+        if (pre_size != NULL) *pre_size = adjust;
+      }    
+      mi_assert_internal((uintptr_t)p % block_size == 0);
     }
-    mi_assert_internal((uintptr_t)p % block_size == 0);
   }
 
   if (page_size != NULL) *page_size = psize;

From 562efed54d36e436518fbb0d49d67e74f8a33207 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 6 Nov 2022 20:36:51 -0800
Subject: [PATCH 08/88] fix full SEGMENT_SIZE internal alignment by adding one
 more slice entry

---
 include/mimalloc-internal.h |  6 +++---
 include/mimalloc-types.h    |  2 +-
 src/alloc-aligned.c         |  2 +-
 src/segment.c               | 14 +++++++++-----
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 40bb1349..243a45a9 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -481,11 +481,11 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
 
 // Get the page containing the pointer
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
+  mi_assert_internal(p > segment);
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
-  mi_assert_internal(diff >= 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE /* can be equal for large alignment */);
-  if (diff == MI_SEGMENT_SIZE) diff--;
+  mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE);
   size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
-  mi_assert_internal(idx < segment->slice_entries);
+  mi_assert_internal(idx <= segment->slice_entries);
   mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
   mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data
   mi_assert_internal(slice->slice_offset == 0);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index b5931789..b960a460 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -381,7 +381,7 @@ typedef struct mi_segment_s {
   mi_segment_kind_t kind;
   _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
   size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
-  mi_slice_t        slices[MI_SLICES_PER_SEGMENT];
+  mi_slice_t        slices[MI_SLICES_PER_SEGMENT+1];  // one more for huge blocks with large alignment
 } mi_segment_t;
 
 
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 76ed0ed7..06ed5272 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -63,7 +63,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
   if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true);  }
     
-  mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
+  // mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
   
   #if MI_TRACK_ENABLED
diff --git a/src/segment.c b/src/segment.c
index c743f02f..f637e7a9 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -277,7 +277,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
       }
       // and the last entry as well (for coalescing)
       const mi_slice_t* last = slice + slice->slice_count - 1;
-      if (last > slice && last < mi_segment_slices_end(segment)) {
+      if (last > slice && last <= mi_segment_slices_end(segment)) {
         mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t));
         mi_assert_internal(last->slice_count == 0);
         mi_assert_internal(last->xblock_size == 1);
@@ -709,9 +709,13 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
 
   // and also for the last one (if not set already) (the last one is needed for coalescing)
   // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543)
-  mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; 
-  if (last < mi_segment_slices_end(segment) && last >= slice) {
-    last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1));
+  size_t slice_last_index = slice_index + slice_count - 1;
+  if (slice_last_index >= segment->slice_entries) { 
+    slice_last_index = segment->slice_entries; 
+  }
+  mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_last_index]; 
+  if (last <= mi_segment_slices_end(segment) && last >= slice) {
+    last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_last_index - slice_index));
     last->slice_count = 0;
     last->xblock_size = 1;
   }
@@ -853,7 +857,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, siz
   if (!is_zero) {
     ptrdiff_t ofs = offsetof(mi_segment_t, next);
     size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
-    memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices);
+    memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1));  // one more
   }
 
   if (!commit_info_still_good) {

From 651a99b35d3a70b764524813af4d205333866653 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 6 Nov 2022 20:57:27 -0800
Subject: [PATCH 09/88] refine last slice setting for large alignments

---
 include/mimalloc-internal.h |  2 +-
 include/mimalloc-types.h    |  3 +--
 src/segment.c               | 29 ++++++++++++++---------------
 test/test-api.c             |  7 ++++---
 4 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 243a45a9..bb4f50d3 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -481,7 +481,7 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
 
 // Get the page containing the pointer
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
-  mi_assert_internal(p > segment);
+  mi_assert_internal(p > (void*)segment);
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
   mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE);
   size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index b960a460..0cef11da 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -174,8 +174,7 @@ typedef int32_t  mi_ssize_t;
 #endif
 
 // Maximum slice offset (15)
-// #define MI_MAX_SLICE_OFFSET               ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
-#define MI_MAX_SLICE_OFFSET               ((MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE))
+#define MI_MAX_SLICE_OFFSET               ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
 
 // Used as a special value to encode block sizes in 32 bits.
 #define MI_HUGE_BLOCK_SIZE                ((uint32_t)(2*MI_GiB))
diff --git a/src/segment.c b/src/segment.c
index f637e7a9..0a5ac3c7 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -277,7 +277,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
       }
       // and the last entry as well (for coalescing)
       const mi_slice_t* last = slice + slice->slice_count - 1;
-      if (last > slice && last <= mi_segment_slices_end(segment)) {
+      if (last > slice && last < mi_segment_slices_end(segment)) {
         mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t));
         mi_assert_internal(last->slice_count == 0);
         mi_assert_internal(last->xblock_size == 1);
@@ -679,7 +679,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz
 // Note: may still return NULL if committing the memory failed
 static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_entries);
-  mi_slice_t* slice = &segment->slices[slice_index];
+  mi_slice_t* const slice = &segment->slices[slice_index];
   mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1);
 
   // commit before changing the slice data
@@ -700,22 +700,21 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   size_t extra = slice_count-1;
   if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET;
   if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1;  // huge objects may have more slices than avaiable entries in the segment->slices
-  slice++;
-  for (size_t i = 1; i <= extra; i++, slice++) {
-    slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
-    slice->slice_count = 0;
-    slice->xblock_size = 1;
+  
+  mi_slice_t* slice_next = slice + 1;
+  for (size_t i = 1; i <= extra; i++, slice_next++) {
+    slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
+    slice_next->slice_count = 0;
+    slice_next->xblock_size = 1;
   }
 
-  // and also for the last one (if not set already) (the last one is needed for coalescing)
+  // and also for the last one (if not set already) (the last one is needed for coalescing and for large alignments)
   // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543)
-  size_t slice_last_index = slice_index + slice_count - 1;
-  if (slice_last_index >= segment->slice_entries) { 
-    slice_last_index = segment->slice_entries; 
-  }
-  mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_last_index]; 
-  if (last <= mi_segment_slices_end(segment) && last >= slice) {
-    last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_last_index - slice_index));
+  mi_slice_t* last = slice + slice_count - 1;
+  mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment);
+  if (last > end) last = end;
+  if (last > slice) {
+    last->slice_offset = (uint32_t)(sizeof(mi_slice_t) * (last - slice));
     last->slice_count = 0;
     last->xblock_size = 1;
   }
diff --git a/test/test-api.c b/test/test-api.c
index 312b3f1b..01ef98bd 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -177,10 +177,11 @@ int main(void) {
   };
   CHECK_BODY("malloc-aligned9") {
     bool ok = true;
-    for (int i = 0; i < 5 && ok; i++) {
+    for (int i = 0; i < 8 && ok; i++) {
       int n = (1 << i);
-      void* p = mi_malloc_aligned( 2*n*MI_ALIGNMENT_MAX, n*MI_ALIGNMENT_MAX);
-      ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0;
+      size_t align = n * (MI_ALIGNMENT_MAX / 8);
+      void* p = mi_malloc_aligned( 2*align, align);
+      ok = ((uintptr_t)p % align) == 0;
       mi_free(p);
     }
     result = ok;

From 1632dd73c9254322f3d65f696195b9b7005ac445 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 6 Nov 2022 21:03:23 -0800
Subject: [PATCH 10/88] remove superfluous asserts

---
 include/mimalloc-internal.h |  2 +-
 src/alloc-aligned.c         | 10 +++-------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index bb4f50d3..192e14da 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -479,7 +479,7 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
   return start;
 }
 
-// Get the page containing the pointer
+// Get the page containing the pointer (performance critical as it is called in mi_free)
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
   mi_assert_internal(p > (void*)segment);
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 06ed5272..66a26b49 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -43,12 +43,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
     }
     oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
     p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block
-    if (p == NULL) return NULL;
-    const uintptr_t adjustx = alignment - (((uintptr_t)p + offset) & align_mask);
-    const mi_page_t* page = _mi_ptr_page(p);
-    const size_t bsize = mi_page_usable_block_size(page);
-    mi_assert_internal(bsize >= adjustx + size);
-    mi_assert_internal(true);
+    if (p == NULL) return NULL;    
   }
   else {
     // otherwise over-allocate
@@ -63,8 +58,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
   if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true);  }
     
-  // mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
+  mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
+  mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
   
   #if MI_TRACK_ENABLED
   if (p != aligned_p) {

From 96f1574faf9739ee6eca5a55df9370767a094247 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 10:51:15 -0800
Subject: [PATCH 11/88] fix huge page aligned allocation size in secure mode

---
 ide/vs2022/mimalloc.vcxproj |  2 +-
 src/segment.c               | 24 ++++++++++++++++--------
 test/test-api.c             |  2 +-
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 616c226c..9811aa55 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -116,7 +116,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=4;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp20</LanguageStandard>
diff --git a/src/segment.c b/src/segment.c
index 0a5ac3c7..22b9ccd0 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -336,12 +336,14 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz
   size_t page_size = _mi_os_page_size();
   size_t isize     = _mi_align_up(sizeof(mi_segment_t), page_size);
   size_t guardsize = 0;
-
+  
   if (MI_SECURE>0) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data (and one at the end of the segment)
-    guardsize =  page_size;
-    required  = _mi_align_up(required, page_size);
+    guardsize = page_size;
+    if (required > 0) {
+      required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size;
+    }
   }
 
   if (pre_size != NULL) *pre_size = isize;
@@ -802,21 +804,27 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, siz
     size_t memid = 0;
     size_t align_offset = 0;
     size_t alignment = MI_SEGMENT_SIZE;
-    size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
-
+    
     if (page_alignment > 0) {
       mi_assert_internal(huge_page != NULL);
       mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN);
       alignment = page_alignment;
       const size_t info_size = info_slices * MI_SEGMENT_SLICE_SIZE;
       align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN );
-      segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE);
-      segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE;
+      const size_t extra = align_offset - info_size;
+      // recalculate due to potential guard pages
+      segment_slices = mi_segment_calculate_slices(required + extra, &pre_size, &info_slices);
+      //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE);
+      //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE;
     }
-    else {
+    const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
+
+    // get from cache
+    if (page_alignment == 0) {
       segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
     }
     
+    // get from OS
     if (segment==NULL) {
       segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
       if (segment == NULL) return NULL;  // failed to allocate
diff --git a/test/test-api.c b/test/test-api.c
index 01ef98bd..e7f3a4ed 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -149,7 +149,7 @@ int main(void) {
     for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) {
       void* ps[8];
       for (int i = 0; i < 8 && ok; i++) {
-        ps[i] = mi_malloc_aligned(align*13 /*size*/, align);
+        ps[i] = mi_malloc_aligned(align*5 /*size*/, align);
         if (ps[i] == NULL || (uintptr_t)(ps[i]) % align != 0) {
           ok = false;
         }

From 711aad7a7538ff6f765f732fcc1fc0000a4b1ef7 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 11:13:29 -0800
Subject: [PATCH 12/88] refactor arena allocation

---
 src/arena.c | 120 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 67 insertions(+), 53 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 5aef95f7..0cc569ab 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -45,7 +45,6 @@ bool  _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
   Arena allocation
 ----------------------------------------------------------- */
 
-#define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
 #define MI_ARENA_BLOCK_SIZE   (4*MI_SEGMENT_ALIGN)     // 32MiB
 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 16MiB
 #define MI_MAX_ARENAS         (64)                     // not more than 126 (since we use 7 bits in the memid and an arena index + 1)
@@ -190,8 +189,63 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
   return p;
 }
 
+// allocate from an arena with fallback to the OS
+static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large,
+                                                bool* is_pinned, bool* is_zero,
+                                                mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld )
+{
+  MI_UNUSED_RELEASE(alignment);
+  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
+  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+  const size_t bcount = mi_block_count_of_size(size);
+  if mi_likely(max_arena == 0) return NULL;
+  mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE);
+
+  size_t arena_index = mi_arena_id_index(req_arena_id);
+  if (arena_index < MI_MAX_ARENAS) {
+    // try a specific arena if requested
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
+    if ((arena != NULL) &&
+        (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
+        (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+    {
+      void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
+      mi_assert_internal((uintptr_t)p % alignment == 0);
+      if (p != NULL) return p;
+    }
+  }
+  else {
+    // try numa affine allocation
+    for (size_t i = 0; i < max_arena; i++) {
+      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+      if (arena == NULL) break; // end reached
+      if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
+          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+      {
+        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
+        mi_assert_internal((uintptr_t)p % alignment == 0);
+        if (p != NULL) return p;
+      }
+    }
+
+    // try from another numa node instead..
+    for (size_t i = 0; i < max_arena; i++) {
+      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+      if (arena == NULL) break; // end reached
+      if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local!
+          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+      {
+        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
+        mi_assert_internal((uintptr_t)p % alignment == 0);
+        if (p != NULL) return p;
+      }
+    }
+  }
+  return NULL;
+}
+
 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
-                              mi_arena_id_t arena_id, size_t* memid, mi_os_tld_t* tld)
+                              mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
   mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
@@ -199,71 +253,31 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
   *is_zero = false;
   *is_pinned = false;
 
-  // try to allocate in an arena if the alignment is small enough
-  // and the object is not too large or too small.
-  if (alignment <= MI_SEGMENT_ALIGN && align_offset == 0 &&
-      size >= MI_ARENA_MIN_OBJ_SIZE &&
-      mi_atomic_load_relaxed(&mi_arena_count) > 0)
-  {
-    const size_t bcount = mi_block_count_of_size(size);
-    const int numa_node = _mi_os_numa_node(tld); // current numa node
-    mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+  bool default_large = false;
+  if (large == NULL) large = &default_large;   // ensure `large != NULL`
+  const int numa_node = _mi_os_numa_node(tld); // current numa node
 
-    // try specific arena if so requested
-    size_t arena_index = mi_arena_id_index(arena_id);
-    if (arena_index < MI_MAX_ARENAS) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
-      if ((arena != NULL) && 
-          (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, arena_id, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
-      }
-    }
-    
-    // try numa affine allocation
-    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena==NULL) break; // end reached
-      if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, arena_id, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
-      }
-    }
-    // try from another numa node instead..
-    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena==NULL) break; // end reached
-      if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, arena_id, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
-      }
-    }
+  // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
+  if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
+    void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
+    if (p != NULL) return p;
   }
 
   // finally, fall back to the OS
-  if (mi_option_is_enabled(mi_option_limit_os_alloc)) {
+  if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) {
     errno = ENOMEM;
     return NULL;
   }
   *is_zero = true;
   *memid   = MI_MEMID_OS;  
   void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats);
-  if (p != NULL) *is_pinned = *large;
+  if (p != NULL) { *is_pinned = *large; }
   return p;
 }
 
-void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t arena_id, size_t* memid, mi_os_tld_t* tld)
+void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
-  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, arena_id, memid, tld);
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
 }
 
 

From 1f12c3dd12646e24b1c378ff2e0b8542cb0e574f Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 11:13:42 -0800
Subject: [PATCH 13/88] remove MI_ALIGNMENT_MAX and add comments

---
 include/mimalloc-types.h |  9 +++++----
 include/mimalloc.h       |  1 -
 src/alloc-aligned.c      |  4 ++--
 src/os.c                 | 13 +++++++------
 src/segment.c            |  4 +---
 test/test-api.c          |  2 +-
 6 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 79d04d2d..e54c4171 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -139,7 +139,8 @@ typedef int32_t  mi_ssize_t;
 
 // Derived constants
 #define MI_SEGMENT_SIZE                   (MI_ZU(1)<<MI_SEGMENT_SHIFT)
-#define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
+#define MI_SEGMENT_ALIGN                  (MI_SEGMENT_SIZE)
+#define MI_SEGMENT_MASK                   (MI_SEGMENT_ALIGN - 1)
 
 #define MI_SMALL_PAGE_SIZE                (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
 #define MI_MEDIUM_PAGE_SIZE               (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
@@ -163,13 +164,13 @@ typedef int32_t  mi_ssize_t;
 #if (MI_LARGE_OBJ_WSIZE_MAX >= 655360)
 #error "mimalloc internal: define more bins"
 #endif
-#if (MI_ALIGNMENT_MAX > MI_SEGMENT_SIZE/2)
-#error "mimalloc internal: the max aligned boundary is too large for the segment size"
-#endif
 
 // Used as a special value to encode block sizes in 32 bits.
 #define MI_HUGE_BLOCK_SIZE   ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
 
+// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments 
+#define MI_ALIGNMENT_MAX   (MI_SEGMENT_SIZE >> 1)  
+
 
 // ------------------------------------------------------
 // Mimalloc pages contain allocated blocks
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 17fd1c60..34644183 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -166,7 +166,6 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
 // Note that `alignment` always follows `size` for consistency with unaligned
 // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
 // -------------------------------------------------------------------------------------
-#define MI_ALIGNMENT_MAX   (2*1024*1024UL)    // maximum supported alignment is 1MiB
 
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 3ce01f5c..86b0112b 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -36,9 +36,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
     // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
     if mi_unlikely(offset != 0) {
       // todo: cannot support offset alignment for very large alignments yet
-#if MI_DEBUG > 0
+      #if MI_DEBUG > 0
       _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
-#endif
+      #endif
       return NULL;
     }
     oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
diff --git a/src/os.c b/src/os.c
index 57b34a2c..c2d53e5b 100644
--- a/src/os.c
+++ b/src/os.c
@@ -842,29 +842,30 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
 
 /* -----------------------------------------------------------
   OS aligned allocation with an offset. This is used
-  for large alignments > MI_SEGMENT_SIZE so we can align
-  the first page at an offset from the start of the segment.
-  As we may need to overallocate, we need to free such pointers
-  using `mi_free_aligned` to use the actual start of the 
-  memory region.
+  for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
+  page where the object can be aligned at an offset from the start of the segment.
+  As we may need to overallocate, we need to free such pointers using `mi_free_aligned` 
+  to use the actual start of the memory region.
 ----------------------------------------------------------- */
 
-
 void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) {
   mi_assert(offset <= MI_SEGMENT_SIZE);
   mi_assert(offset <= size);
   mi_assert((alignment % _mi_os_page_size()) == 0);
   if (offset > MI_SEGMENT_SIZE) return NULL;
   if (offset == 0) {
+    // regular aligned allocation
     return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats);
   }
   else {
+    // overallocate to align at an offset
     const size_t extra = _mi_align_up(offset, alignment) - offset;
     const size_t oversize = size + extra;
     void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats);
     if (start == NULL) return NULL;
     void* p = (uint8_t*)start + extra;
     mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
+    // decommit the overallocation at the start
     if (commit && extra > _mi_os_page_size()) {
       _mi_os_decommit(start, extra, tld_stats);
     }
diff --git a/src/segment.c b/src/segment.c
index 5274aa1d..798aa756 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1317,9 +1317,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
     mi_assert_internal(_mi_is_power_of_two(page_alignment));
     mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE);
     //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0);
-    if (page_alignment < MI_SEGMENT_SIZE) {
-      page_alignment = MI_SEGMENT_SIZE;      
-    }
+    if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; }
     page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld);
   }
   else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
diff --git a/test/test-api.c b/test/test-api.c
index 312b3f1b..a16ef381 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -34,7 +34,7 @@ we therefore test the API over various inputs. Please add more tests :-)
 
 #include "mimalloc.h"
 // #include "mimalloc-internal.h"
-#include "mimalloc-types.h" // for MI_DEBUG
+#include "mimalloc-types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX
 
 #include "testhelper.h"
 

From 2daec6c72f62597b8f87e28caecba028102c053a Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 11:23:04 -0800
Subject: [PATCH 14/88] add more comments

---
 include/mimalloc-internal.h | 3 +++
 src/alloc-aligned.c         | 2 ++
 src/page.c                  | 3 +++
 src/region.c                | 2 --
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 4620fb72..53c0340a 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -425,6 +425,9 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {
 }
 
 // Segment that contains the pointer
+// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
+// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; 
+// therefore we align one byte before `p`.
 static inline mi_segment_t* _mi_ptr_segment(const void* p) {
   mi_assert_internal(p != NULL);
   return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 86b0112b..db80baee 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -34,6 +34,8 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   size_t oversize;
   if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
     // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
+    // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the 
+    // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
     if mi_unlikely(offset != 0) {
       // todo: cannot support offset alignment for very large alignments yet
       #if MI_DEBUG > 0
diff --git a/src/page.c b/src/page.c
index e359d5bb..7c3a30a8 100644
--- a/src/page.c
+++ b/src/page.c
@@ -795,6 +795,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
 // Because huge pages contain just one block, and the segment contains
 // just that page, we always treat them as abandoned and any thread
 // that frees the block can free the whole page and segment directly.
+// Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX).
 static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
   size_t block_size = _mi_os_good_alloc_size(size);
   mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
@@ -844,6 +845,8 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme
 
 // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
 // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
+// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for 
+// very large requested alignments in which case we use a huge segment.
 void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept
 {
   mi_assert_internal(heap != NULL);
diff --git a/src/region.c b/src/region.c
index ea376aa4..f069502f 100644
--- a/src/region.c
+++ b/src/region.c
@@ -65,8 +65,6 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offs
 #error "define the maximum heap space allowed for regions on this platform"
 #endif
 
-#define MI_SEGMENT_ALIGN          MI_SEGMENT_SIZE
-
 #define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
 #define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
 #define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)

From 312ce6f916ce758db2a2a1364b2d9b6480c08ee5 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 11:36:44 -0800
Subject: [PATCH 15/88] bump version to v1.7.8 for further development

---
 cmake/mimalloc-config-version.cmake | 2 +-
 include/mimalloc.h                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index d1191e69..97b3f575 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
 set(mi_version_major 1)
 set(mi_version_minor 7)
-set(mi_version_patch 7)
+set(mi_version_patch 8)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 34644183..1b7d6dd1 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 177   // major + 2 digits minor
+#define MI_MALLOC_VERSION 178   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes

From 3e1d800e9bad8cdd572b11e6b4aedc1d8318fa07 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 14:47:53 -0800
Subject: [PATCH 16/88] potential fix for windows static linking with thread
 creation in dll's

---
 include/mimalloc-internal.h |  2 ++
 include/mimalloc-types.h    |  1 +
 src/init.c                  | 18 +++++++++++++-----
 src/random.c                | 26 ++++++++++++++++++++++----
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 53c0340a..ea104f3d 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -60,6 +60,8 @@ void       _mi_error_message(int err, const char* fmt, ...);
 
 // random.c
 void       _mi_random_init(mi_random_ctx_t* ctx);
+void       _mi_random_init_weak(mi_random_ctx_t* ctx);
+void       _mi_random_reinit_if_weak(mi_random_ctx_t * ctx);
 void       _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
 uintptr_t  _mi_random_next(mi_random_ctx_t* ctx);
 uintptr_t  _mi_heap_random_next(mi_heap_t* heap);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index e54c4171..760d57d2 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -357,6 +357,7 @@ typedef struct mi_random_cxt_s {
   uint32_t input[16];
   uint32_t output[16];
   int      output_available;
+  bool     weak;
 } mi_random_ctx_t;
 
 
diff --git a/src/init.c b/src/init.c
index fe078145..43cb0560 100644
--- a/src/init.c
+++ b/src/init.c
@@ -142,8 +142,13 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
 static void mi_heap_main_init(void) {
   if (_mi_heap_main.cookie == 0) {
     _mi_heap_main.thread_id = _mi_thread_id();
-    _mi_heap_main.cookie = _mi_os_random_weak((uintptr_t)&mi_heap_main_init);
-    _mi_random_init(&_mi_heap_main.random);
+    _mi_heap_main.cookie = 1;
+    #if defined(_WIN32) && !defined(MI_SHARED_LIB)
+      _mi_random_init_weak(&_mi_heap_main.random);    // prevent allocation failure during bcrypt dll initialization with static linking
+    #else
+      _mi_random_init(&_mi_heap_main.random);
+    #endif
+    _mi_heap_main.cookie  = _mi_heap_random_next(&_mi_heap_main);
     _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
     _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
   }
@@ -502,12 +507,13 @@ static void mi_process_load(void) {
   MI_UNUSED(dummy);
   #endif
   os_preloading = false;
+  mi_assert_internal(_mi_is_main_thread());
   #if !(defined(_WIN32) && defined(MI_SHARED_LIB))  // use Dll process detach (see below) instead of atexit (issue #521)
   atexit(&mi_process_done);  
   #endif
   _mi_options_init();
-  mi_process_init();
-  //mi_stats_reset();-
+  mi_process_setup_auto_thread_done();
+  mi_process_init();  
   if (mi_redirected) _mi_verbose_message("malloc is redirected.\n");
 
   // show message from the redirector (if present)
@@ -516,6 +522,9 @@ static void mi_process_load(void) {
   if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
     _mi_fputs(NULL,NULL,NULL,msg);
   }
+
+  // reseed random
+  _mi_random_reinit_if_weak(&_mi_heap_main.random);
 }
 
 #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@@ -542,7 +551,6 @@ void mi_process_init(void) mi_attr_noexcept {
   _mi_process_is_initialized = true;
   mi_process_setup_auto_thread_done();
 
-  
   mi_detect_cpu_features();
   _mi_os_init();
   mi_heap_main_init();
diff --git a/src/random.c b/src/random.c
index a5f5e6b8..60d64ef7 100644
--- a/src/random.c
+++ b/src/random.c
@@ -168,7 +168,7 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
 
 #if defined(_WIN32)
 
-#if defined(MI_USE_RTLGENRANDOM) || defined(__cplusplus)
+#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus)
 // We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using 
 // dynamic overriding, we observed it can raise an exception when compiled with C++, and 
 // sometimes deadlocks when also running under the VS debugger.
@@ -303,23 +303,41 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
   return x;
 }
 
-void _mi_random_init(mi_random_ctx_t* ctx) {
+static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) {
   uint8_t key[32];
-  if (!os_random_buf(key, sizeof(key))) {
+  if (use_weak || !os_random_buf(key, sizeof(key))) {
     // if we fail to get random data from the OS, we fall back to a
     // weak random source based on the current time
     #if !defined(__wasi__)
-    _mi_warning_message("unable to use secure randomness\n");
+    if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); }
     #endif
     uintptr_t x = _mi_os_random_weak(0);
     for (size_t i = 0; i < 8; i++) {  // key is eight 32-bit words.
       x = _mi_random_shuffle(x);
       ((uint32_t*)key)[i] = (uint32_t)x;
     }
+    ctx->weak = true;
+  }
+  else {
+    ctx->weak = false;
   }
   chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
 }
 
+void _mi_random_init(mi_random_ctx_t* ctx) {
+  mi_random_init_ex(ctx, false);  
+}
+
+void _mi_random_init_weak(mi_random_ctx_t * ctx) {
+  mi_random_init_ex(ctx, true);
+}
+
+void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx) {
+  if (ctx->weak) {
+    _mi_random_init(ctx);
+  }
+}
+
 /* --------------------------------------------------------
 test vectors from <https://tools.ietf.org/html/rfc8439>
 ----------------------------------------------------------- */

From 841172bd7ddcbe68191c686291e194a0d03cf04f Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 14:51:07 -0800
Subject: [PATCH 17/88] fix link error on windows

---
 src/alloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/alloc.c b/src/alloc.c
index d10bd586..a38da2e5 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -584,6 +584,7 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
 void* _mi_externs[] = {
   (void*)&_mi_page_malloc,
   (void*)&_mi_heap_malloc_zero,
+  (void*)&_mi_heap_malloc_zero_ex,
   (void*)&mi_malloc,
   (void*)&mi_malloc_small,
   (void*)&mi_zalloc_small,

From 29405c7d70c931c890f53f3ad80243ba83220768 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 14:53:27 -0800
Subject: [PATCH 18/88] fix initializer

---
 src/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/init.c b/src/init.c
index 38a38913..ca48fb10 100644
--- a/src/init.c
+++ b/src/init.c
@@ -111,7 +111,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   0,                // cookie
   0,                // arena id
   { 0, 0 },         // keys
-  { {0}, {0}, 0 },
+  { {0}, {0}, 0, true },
   0,                // page count
   MI_BIN_FULL, 0,   // page retired min/max
   NULL,             // next
@@ -152,7 +152,7 @@ mi_heap_t _mi_heap_main = {
   0,                // initial cookie
   0,                // arena id
   { 0, 0 },         // the key of the main heap can be fixed (unlike page keys that need to be secure!)
-  { {0x846ca68b}, {0}, 0 },  // random
+  { {0x846ca68b}, {0}, 0, true },  // random
   0,                // page count
   MI_BIN_FULL, 0,   // page retired min/max
   NULL,             // next heap

From 56ab9ee06a896171f057ad51f38151bc39ba2509 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 14:55:36 -0800
Subject: [PATCH 19/88] update azure pipeline to ubuntu 22.04

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 6e99c9d9..57cabbef 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -55,7 +55,7 @@ jobs:
   displayName: Linux
   pool:
     vmImage:
-     ubuntu-18.04
+     ubuntu-22.04
   strategy:
     matrix:
       Debug:

From 27fc2cf17e40fd28ba952c1dfad5d17abdf1cb56 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 14:57:07 -0800
Subject: [PATCH 20/88] fix random initializer

---
 src/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/init.c b/src/init.c
index 43cb0560..78bc2bad 100644
--- a/src/init.c
+++ b/src/init.c
@@ -95,7 +95,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   0,                // tid
   0,                // cookie
   { 0, 0 },         // keys
-  { {0}, {0}, 0 },
+  { {0}, {0}, 0, true }, // random
   0,                // page count
   MI_BIN_FULL, 0,   // page retired min/max
   NULL,             // next
@@ -127,7 +127,7 @@ mi_heap_t _mi_heap_main = {
   0,                // thread id
   0,                // initial cookie
   { 0, 0 },         // the key of the main heap can be fixed (unlike page keys that need to be secure!)
-  { {0x846ca68b}, {0}, 0 },  // random
+  { {0x846ca68b}, {0}, 0, true },  // random
   0,                // page count
   MI_BIN_FULL, 0,   // page retired min/max
   NULL,             // next heap

From 2479d168adb829f2f7edfa9562f53dddd7d0c7a7 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 16:41:40 -0800
Subject: [PATCH 21/88] decommit unused prefix of large aligned blocks

---
 include/mimalloc-internal.h |  5 +++++
 src/alloc-aligned.c         | 15 ++++++++++++--
 src/alloc.c                 | 41 ++++++++++++++++++++++---------------
 src/segment.c               | 14 +++++++++----
 test/test-api.c             | 16 ++++++++++-----
 5 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index ea104f3d..01df65dd 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -94,6 +94,7 @@ void       _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_off
 bool       _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
 bool       _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
 bool       _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
+bool       _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld);
 bool       _mi_mem_protect(void* addr, size_t size);
 bool       _mi_mem_unprotect(void* addr, size_t size);
 
@@ -485,6 +486,10 @@ static inline size_t mi_page_block_size(const mi_page_t* page) {
   }
 }
 
+static inline bool mi_page_is_huge(const mi_page_t* page) {
+  return (_mi_page_segment(page)->page_kind == MI_PAGE_HUGE);
+}
+
 // Get the usable block size of a page without fixed padding.
 // This may still include internal padding due to alignment and rounding up size classes.
 static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index db80baee..ffc51edc 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -44,8 +44,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
       return NULL;
     }
     oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
-    p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block
-    if (p == NULL) return NULL;
+    p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
+    // zero afterwards as only the area from the aligned_p may be committed!
+    if (p == NULL) return NULL;    
   }
   else {
     // otherwise over-allocate
@@ -63,6 +64,16 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
   mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
+
+  // now zero the block if needed
+  if (zero && alignment > MI_ALIGNMENT_MAX) {
+    const ptrdiff_t diff = (uint8_t*)aligned_p - (uint8_t*)p;
+    ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE;
+    #if MI_PADDING
+    zsize -= MI_MAX_ALIGN_SIZE;
+    #endif 
+    if (zsize > 0) { _mi_memzero(aligned_p, zsize); }
+  }
   
   #if MI_TRACK_ENABLED
   if (p != aligned_p) {
diff --git a/src/alloc.c b/src/alloc.c
index a38da2e5..cfc623c1 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -51,7 +51,9 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
   }
 
 #if (MI_DEBUG>0) && !MI_TRACK_ENABLED
-  if (!page->is_zero && !zero) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); }
+  if (!page->is_zero && !zero && !mi_page_is_huge(page)) { 
+    memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); 
+  }
 #elif (MI_SECURE!=0)
   if (!zero) { block->next = 0; } // don't leak internal data
 #endif
@@ -77,9 +79,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
   #endif
   padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
   padding->delta  = (uint32_t)(delta);
-  uint8_t* fill = (uint8_t*)padding - delta;
-  const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
-  for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
+  if (!mi_page_is_huge(page)) {
+    uint8_t* fill = (uint8_t*)padding - delta;
+    const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
+    for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
+  }
 #endif
 
   return block;
@@ -250,17 +254,19 @@ static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, si
   if (!ok) return false;
   mi_assert_internal(bsize >= delta);
   *size = bsize - delta;
-  uint8_t* fill = (uint8_t*)block + bsize - delta;
-  const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
-  mi_track_mem_defined(fill,maxpad);
-  for (size_t i = 0; i < maxpad; i++) {
-    if (fill[i] != MI_DEBUG_PADDING) {
-      *wrong = bsize - delta + i;
-      ok = false;
-      break;
+  if (!mi_page_is_huge(page)) {
+    uint8_t* fill = (uint8_t*)block + bsize - delta;
+    const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
+    mi_track_mem_defined(fill, maxpad);
+    for (size_t i = 0; i < maxpad; i++) {
+      if (fill[i] != MI_DEBUG_PADDING) {
+        *wrong = bsize - delta + i;
+        ok = false;
+        break;
+      }
     }
+    mi_track_mem_noaccess(fill, maxpad);
   }
-  mi_track_mem_noaccess(fill,maxpad);
   return ok;
 }
 
@@ -361,10 +367,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
   // that is safe as these are constant and the page won't be freed (as the block is not freed yet).
   mi_check_padding(page, block);
   mi_padding_shrink(page, block, sizeof(mi_block_t));       // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
-  #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED                    // note: when tracking, cannot use mi_usable_size with multi-threading
-  memset(block, MI_DEBUG_FREED, mi_usable_size(block));
-  #endif
-
+  
   // huge page segments are always abandoned and can be freed immediately
   mi_segment_t* const segment = _mi_page_segment(page);
   if (segment->page_kind==MI_PAGE_HUGE) {
@@ -373,6 +376,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
     return;
   }
 
+  #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED                    // note: when tracking, cannot use mi_usable_size with multi-threading
+  memset(block, MI_DEBUG_FREED, mi_usable_size(block));
+  #endif
+
   // Try to put the block on either the page-local thread free list, or the heap delayed free list.
   mi_thread_free_t tfreex;
   bool use_delayed;
diff --git a/src/segment.c b/src/segment.c
index 798aa756..b922a50c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1265,10 +1265,16 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   if (page_alignment > 0) {
     size_t psize;
     size_t pre_size;
-    void* p = _mi_segment_page_start(segment, page, 0, &psize, &pre_size);
-    void* aligned_p = (void*)_mi_align_up((uintptr_t)p, page_alignment);
-    mi_assert_internal(page_alignment == 0 || _mi_is_aligned(aligned_p, page_alignment));
-    mi_assert_internal(page_alignment == 0 || psize - ((uint8_t*)aligned_p - (uint8_t*)p) >= size);
+    uint8_t* p = (uint8_t*)_mi_segment_page_start(segment, page, 0, &psize, &pre_size);
+    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)p, page_alignment);
+    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
+    mi_assert_internal(psize - (aligned_p - p) >= size);
+    if (!segment->mem_is_pinned && page->is_committed) {
+       // decommit the part of the page that is unused; this can be quite large (close to MI_SEGMENT_SIZE)
+      uint8_t* decommit_start = p + sizeof(mi_block_t); // for the free list
+      ptrdiff_t decommit_size = aligned_p - decommit_start;
+      _mi_mem_decommit(decommit_start, decommit_size, os_tld);
+    }
   }
 #endif
   // for huge pages we initialize the xblock_size as we may
diff --git a/test/test-api.c b/test/test-api.c
index a16ef381..65578287 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -177,11 +177,17 @@ int main(void) {
   };
   CHECK_BODY("malloc-aligned9") {
     bool ok = true;
-    for (int i = 0; i < 5 && ok; i++) {
-      int n = (1 << i);
-      void* p = mi_malloc_aligned( 2*n*MI_ALIGNMENT_MAX, n*MI_ALIGNMENT_MAX);
-      ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0;
-      mi_free(p);
+    void* p[8];
+    size_t sizes[8] = { 8, 512, 1024 * 1024, MI_ALIGNMENT_MAX, MI_ALIGNMENT_MAX + 1, 2 * MI_ALIGNMENT_MAX, 8 * MI_ALIGNMENT_MAX, 0 };
+    for (int i = 0; i < 28 && ok; i++) {
+      int align = (1 << i);
+      for (int j = 0; j < 8 && ok; j++) {
+        p[j] = mi_zalloc_aligned(sizes[j], align);
+        ok = ((uintptr_t)p[j] % align) == 0;
+      }
+      for (int j = 0; j < 8; j++) {
+        mi_free(p[j]);
+      }      
     }
     result = ok;
   };

From 67439bb4e5b00f1144bf7516c75649fd29d5dd3e Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 17:12:14 -0800
Subject: [PATCH 22/88] add NULL check in _mi_segment_of

---
 src/segment-cache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 34c8b029..436ce2bf 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -285,8 +285,9 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) {
 
 // Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
 static mi_segment_t* _mi_segment_of(const void* p) {
+  if (p == NULL) return NULL;
   mi_segment_t* segment = _mi_ptr_segment(p);
-  if (segment == NULL) return NULL; 
+  mi_assert_internal(segment != NULL);
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge

From ba8c0f890314d80d830599f686eaa63aafcee880 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 17:21:03 -0800
Subject: [PATCH 23/88] avoid warning for large aligned blocks on linux

---
 src/alloc.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 2b53ac22..f951370a 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -488,10 +488,16 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
 
 #if (MI_DEBUG>0)
   if mi_unlikely(!mi_is_in_heap_region(p)) {
-    _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
-      "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
-    if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
-      _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
+  #if (MI_INTPTR_SIZE == 8 && defined(__linux__))
+    if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640)
+  #else
+    {
+  #endif
+      _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
+        "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
+      if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
+        _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
+      }
     }
   }
 #endif

From b940543cd582514b5f53bc6c317d5fcfd7f28f55 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Thu, 17 Nov 2022 18:57:45 -0800
Subject: [PATCH 24/88] experiment with smaller segment size (32MiB) and finer
 minimal commit (1MiB)

---
 include/mimalloc-types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 016bb684..3a4f8f6b 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -132,7 +132,7 @@ typedef int32_t  mi_ssize_t;
 
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit (usually divide by two for 32-bit)
-#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
+#define MI_SEGMENT_SLICE_SHIFT            (12 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
 
 #if MI_INTPTR_SIZE > 4
 #define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64MiB
@@ -324,7 +324,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (2*MI_MiB)
+#define MI_MINIMAL_COMMIT_SIZE      (MI_MiB)
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS

From 82a765a255b028cf57b9ddcf95a125cdf821da87 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 18 Nov 2022 09:38:01 -0800
Subject: [PATCH 25/88] experiment with 32KiB slices and increased
 MI_MIN_EXTEND

---
 include/mimalloc-types.h | 8 ++++----
 src/page.c               | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 3a4f8f6b..88905bdc 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -132,16 +132,16 @@ typedef int32_t  mi_ssize_t;
 
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit (usually divide by two for 32-bit)
-#define MI_SEGMENT_SLICE_SHIFT            (12 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
+#define MI_SEGMENT_SLICE_SHIFT            (12 + MI_INTPTR_SHIFT)         // 32KiB  (32KiB on 32-bit)
 
 #if MI_INTPTR_SIZE > 4
-#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64MiB
+#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 32MiB
 #else
 #define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  // 4MiB on 32-bit
 #endif
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
-#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB
+#define MI_MEDIUM_PAGE_SHIFT              ( 4 + MI_SMALL_PAGE_SHIFT)     // 512KiB
 
 
 // Derived constants
@@ -324,7 +324,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (MI_MiB)
+#define MI_MINIMAL_COMMIT_SIZE      (2*MI_MiB)
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
diff --git a/src/page.c b/src/page.c
index 2fa03606..cb957bf7 100644
--- a/src/page.c
+++ b/src/page.c
@@ -408,7 +408,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
 }
 
 // Retire parameters
-#define MI_MAX_RETIRE_SIZE    MI_MEDIUM_OBJ_SIZE_MAX  
+#define MI_MAX_RETIRE_SIZE    (MI_MEDIUM_OBJ_SIZE_MAX)
 #define MI_RETIRE_CYCLES      (8)
 
 // Retire a page with no more used blocks
@@ -579,7 +579,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
 #if (MI_SECURE>0)
 #define MI_MIN_EXTEND         (8*MI_SECURE) // extend at least by this many
 #else
-#define MI_MIN_EXTEND         (1)
+#define MI_MIN_EXTEND         (4)
 #endif
 
 // Extend the capacity (up to reserved) by initializing a free list

From 8834e128e8e13273c0288fb203606f9ed25a6c6f Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 18 Nov 2022 10:13:51 -0800
Subject: [PATCH 26/88] improve mi_free codegen

---
 include/mimalloc-internal.h |  1 +
 src/alloc.c                 | 64 +++++++++++++++++++++----------------
 2 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 01df65dd..991e8a3e 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -154,6 +154,7 @@ void*       _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, siz
 void*       _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
 mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
 bool        _mi_free_delayed_block(mi_block_t* block);
+void        _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept;  // for runtime integration
 
 #if MI_DEBUG>1
 bool        _mi_page_is_valid(mi_page_t* page);
diff --git a/src/alloc.c b/src/alloc.c
index cfc623c1..38503dbb 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -456,12 +456,12 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
 }
 
 
-static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) mi_attr_noexcept {
-  mi_page_t* const page = _mi_segment_page_of(segment, p);
+void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
+  //mi_page_t* const page = _mi_segment_page_of(segment, p);
   mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
   mi_stat_free(page, block);                 // stat_free may access the padding
   mi_track_free(p);
-  _mi_free_block(page, local, block);  
+  _mi_free_block(page, is_local, block);  
 }
 
 // Get the segment data belonging to a pointer
@@ -470,6 +470,8 @@ static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool l
 static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) 
 {
   MI_UNUSED(msg);
+  mi_assert(p != NULL);
+
 #if (MI_DEBUG>0)
   if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
     _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
@@ -477,8 +479,8 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
   }
 #endif
 
-  if mi_unlikely(p == NULL) return NULL;
   mi_segment_t* const segment = _mi_ptr_segment(p);
+  mi_assert_internal(segment != NULL);
 
 #if (MI_DEBUG>0)
   if mi_unlikely(!mi_is_in_heap_region(p)) {
@@ -495,38 +497,44 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
     return NULL;
   }
 #endif
+
   return segment;
 }
 
 // Free a block 
+// fast path written carefully to prevent spilling on the stack
 void mi_free(void* p) mi_attr_noexcept
 {
+  if mi_unlikely(p == NULL) return;
   mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
-  if mi_unlikely(segment == NULL) return; 
-
-  mi_threadid_t tid = _mi_thread_id();
-  mi_page_t* const page = _mi_segment_page_of(segment, p);
-  mi_block_t* const block = (mi_block_t*)p;
-
-  if mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0) {  // the thread id matches and it is not a full page, nor has aligned blocks
-    // local, and not full or aligned
-    if mi_unlikely(mi_check_is_double_free(page,block)) return;      
-    mi_check_padding(page, block);
-    mi_stat_free(page, block);
-    #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
-    memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
-    #endif
-    mi_track_free(p);
-    mi_block_set_next(page, block, page->local_free);
-    page->local_free = block;
-    if mi_unlikely(--page->used == 0) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))    
-      _mi_page_retire(page);
-    }    
+  const bool          is_local= (_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
+  mi_page_t* const    page    = _mi_segment_page_of(segment, p);
+  
+  if mi_likely(is_local) {                       // thread-local free?
+    if mi_likely(page->flags.full_aligned == 0)  // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
+    {  
+      mi_block_t* const block = (mi_block_t*)p;
+      if mi_unlikely(mi_check_is_double_free(page, block)) return;
+      mi_check_padding(page, block);
+      mi_stat_free(page, block);
+      #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
+      memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+      #endif
+      mi_track_free(p);
+      mi_block_set_next(page, block, page->local_free);
+      page->local_free = block;
+      if mi_unlikely(--page->used == 0) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))    
+        _mi_page_retire(page);
+      }
+    }
+    else {
+      // page is full or contains (inner) aligned blocks; use generic path
+      _mi_free_generic(segment, page, true, p);
+    }
   }
   else {
-    // non-local, aligned blocks, or a full page; use the more generic path
-    // note: recalc page in generic to improve code generation
-    mi_free_generic(segment, tid == segment->thread_id, p);
+    // not thread-local; use generic path
+    _mi_free_generic(segment, page, false, p);
   }  
 }
 
@@ -565,8 +573,8 @@ mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t
 }
 
 static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
+  if (p == NULL) return 0;
   const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
-  if (segment==NULL) return 0;  // also returns 0 if `p == NULL`
   const mi_page_t* const page = _mi_segment_page_of(segment, p);  
   if mi_likely(!mi_page_has_aligned(page)) {
     const mi_block_t* block = (const mi_block_t*)p;

From 18cf94dff63b2304174dece26bc49c9d568b540d Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 18 Nov 2022 10:14:37 -0800
Subject: [PATCH 27/88] move keys field in the page struct to keep offsets in
 the fast path the same for different build variants

---
 include/mimalloc-types.h | 7 ++++---
 src/init.c               | 6 +++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 760d57d2..5c64be4e 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -271,13 +271,14 @@ typedef struct mi_page_s {
   uint8_t               retire_expire:7;   // expiration count for retired blocks
 
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
+  uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
+  uint32_t              xblock_size;       // size available in each block (always `>0`) 
+  mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
+
   #ifdef MI_ENCODE_FREELIST
   uintptr_t             keys[2];           // two random keys to encode the free lists (see `_mi_block_next`)
   #endif
-  uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
-  uint32_t              xblock_size;       // size available in each block (always `>0`) 
 
-  mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
   _Atomic(mi_thread_free_t) xthread_free;  // list of deferred free blocks freed by other threads
   _Atomic(uintptr_t)        xheap;
   
diff --git a/src/init.c b/src/init.c
index 78bc2bad..8a4316e5 100644
--- a/src/init.c
+++ b/src/init.c
@@ -19,12 +19,12 @@ const mi_page_t _mi_page_empty = {
   false,   // is_zero
   0,       // retire_expire
   NULL,    // free
-  #if MI_ENCODE_FREELIST
-  { 0, 0 },
-  #endif
   0,       // used
   0,       // xblock_size
   NULL,    // local_free
+  #if MI_ENCODE_FREELIST
+  { 0, 0 },
+  #endif
   MI_ATOMIC_VAR_INIT(0), // xthread_free
   MI_ATOMIC_VAR_INIT(0), // xheap
   NULL, NULL

From 35d332141dea7e64163d75b3f7c9739d753a6412 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 18 Nov 2022 11:00:23 -0800
Subject: [PATCH 28/88] experimental destroy_on_exit option for statically
 linked mimalloc in a dynamically unloaded DLL

---
 include/mimalloc-internal.h |  1 +
 include/mimalloc.h          |  1 +
 src/heap.c                  | 15 ++++++++++++++-
 src/init.c                  |  7 ++++++-
 src/options.c               |  3 ++-
 test/main-override-static.c |  2 +-
 6 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 991e8a3e..0ecc3057 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -139,6 +139,7 @@ uint8_t    _mi_bin(size_t size);                // for stats
 void       _mi_heap_destroy_pages(mi_heap_t* heap);
 void       _mi_heap_collect_abandon(mi_heap_t* heap);
 void       _mi_heap_set_default_direct(mi_heap_t* heap);
+void       _mi_heap_destroy_all(void);
 
 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 1b7d6dd1..1fc10b2a 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -335,6 +335,7 @@ typedef enum mi_option_e {
   mi_option_max_errors,
   mi_option_max_warnings,
   mi_option_max_segment_reclaim,
+  mi_option_destroy_on_exit,          
   _mi_option_last
 } mi_option_t;
 
diff --git a/src/heap.c b/src/heap.c
index c36a9616..3c73d935 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -327,7 +327,20 @@ void mi_heap_destroy(mi_heap_t* heap) {
   }
 }
 
-
+void _mi_heap_destroy_all(void) {
+  mi_heap_t* bheap = mi_heap_get_backing();
+  mi_heap_t* curr = bheap->tld->heaps;
+  while (curr != NULL) {
+    mi_heap_t* next = curr->next;
+    if (curr->no_reclaim) {
+      mi_heap_destroy(curr);
+    }
+    else {
+      _mi_heap_destroy_pages(curr);
+    }
+    curr = next;
+  }
+}
 
 /* -----------------------------------------------------------
   Safe Heap delete
diff --git a/src/init.c b/src/init.c
index 8a4316e5..750693c0 100644
--- a/src/init.c
+++ b/src/init.c
@@ -598,7 +598,7 @@ static void mi_cdecl mi_process_done(void) {
   #if defined(_WIN32) && !defined(MI_SHARED_LIB)
   FlsFree(mi_fls_key);  // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208
   #endif
-  
+
   #ifndef MI_SKIP_COLLECT_ON_EXIT
     #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)  
     // free all memory if possible on process exit. This is not needed for a stand-alone process
@@ -608,6 +608,11 @@ static void mi_cdecl mi_process_done(void) {
     #endif
   #endif
 
+  if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
+    _mi_heap_destroy_all();                          // forcefully release all memory held by all heaps (of this thread only!)
+    _mi_mem_collect(&_mi_heap_main_get()->tld->os);  // release all regions
+  }
+
   if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
     mi_stats_print(NULL);
   }
diff --git a/src/options.c b/src/options.c
index c5d56380..4fa5d5f8 100644
--- a/src/options.c
+++ b/src/options.c
@@ -93,7 +93,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,  UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
   { 16,  UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
-  { 8,   UNINIT, MI_OPTION(max_segment_reclaim)} // max. number of segment reclaims from the abandoned segments per try.
+  { 8,   UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.
+  { 0,   UNINIT, MI_OPTION(destroy_on_exit)}     // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/test/main-override-static.c b/test/main-override-static.c
index a5088d3a..d216a330 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -32,7 +32,7 @@ int main() {
   // invalid_free();
   // test_reserved();
   // negative_stat();
-  test_heap_walk();
+  // test_heap_walk();
   // alloc_huge();
   
   void* p1 = malloc(78);

From 1a7f6f376d28571432ee4d3a498680da3c9dda89 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 10:22:50 -0800
Subject: [PATCH 29/88] move threadid field

---
 include/mimalloc-types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index cf142748..d44ecc18 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -374,8 +374,9 @@ typedef struct mi_segment_s {
 
   // layout like this to optimize access in `mi_free`
   mi_segment_kind_t kind;
-  _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
   size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
+  _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
+
   mi_slice_t        slices[MI_SLICES_PER_SEGMENT+1];  // one more for huge blocks with large alignment
 } mi_segment_t;
 

From e6ab602ca1486424868b23f3f889c3f9e37b19ef Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 10:24:02 -0800
Subject: [PATCH 30/88] add comment

---
 src/init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/init.c b/src/init.c
index 750693c0..3f71fa01 100644
--- a/src/init.c
+++ b/src/init.c
@@ -608,6 +608,9 @@ static void mi_cdecl mi_process_done(void) {
     #endif
   #endif
 
+  // Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free
+  // since after process_done there might still be other code running that calls `free` (like at_exit routines,
+  // or C-runtime termination code.
   if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
     _mi_heap_destroy_all();                          // forcefully release all memory held by all heaps (of this thread only!)
     _mi_mem_collect(&_mi_heap_main_get()->tld->os);  // release all regions

From 3ccf849c1a901f3c6fd11ad16c089d60ace4580a Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 15:02:41 -0800
Subject: [PATCH 31/88] more refined decommit extend delay

---
 src/segment.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 3423bd53..25e63904 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -540,8 +540,12 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     }
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
-      // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+      if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) {
+        mi_segment_delayed_decommit(segment, true, stats);
+      }
+      else {
+        segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+      }
     }
     else {
       // previous decommit mask is not yet expired, increase the expiration by a bit.

From c0077471695338fb5a233a42341a052133ef0180 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 15:03:15 -0800
Subject: [PATCH 32/88] back to 64k pages but 32MiB segments and a 1MiB minimal
 commit size

---
 include/mimalloc-types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index d44ecc18..399001c6 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -132,10 +132,10 @@ typedef int32_t  mi_ssize_t;
 
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit (usually divide by two for 32-bit)
-#define MI_SEGMENT_SLICE_SHIFT            (12 + MI_INTPTR_SHIFT)         // 32KiB  (32KiB on 32-bit)
+#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
 
 #if MI_INTPTR_SIZE > 4
-#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 32MiB
+#define MI_SEGMENT_SHIFT                  ( 9 + MI_SEGMENT_SLICE_SHIFT)  // 32MiB
 #else
 #define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  // 4MiB on 32-bit
 #endif
@@ -325,7 +325,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (2*MI_MiB)
+#define MI_MINIMAL_COMMIT_SIZE      (16*MI_SEGMENT_SLICE_SIZE)           // 1MiB
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS

From 83c027c4bf591b3154a6bab02dc343ff99837387 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 18:56:56 -0800
Subject: [PATCH 33/88] fix medium page size to 512k

---
 include/mimalloc-types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 399001c6..9def491e 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -141,7 +141,7 @@ typedef int32_t  mi_ssize_t;
 #endif
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
-#define MI_MEDIUM_PAGE_SHIFT              ( 4 + MI_SMALL_PAGE_SHIFT)     // 512KiB
+#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB
 
 
 // Derived constants

From 631f22fbda236b0bc567689274cf75a4f83aac91 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 22 Nov 2022 10:54:40 -0800
Subject: [PATCH 34/88] change library linkage to PRIVATE (issue #646)

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2bc0f76b..4eebf2ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -340,7 +340,7 @@ if(MI_BUILD_SHARED)
   set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} )
   target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT)
   target_compile_options(mimalloc PRIVATE ${mi_cflags})
-  target_link_libraries(mimalloc PUBLIC ${mi_libraries})
+  target_link_libraries(mimalloc PRIVATE ${mi_libraries})
   target_include_directories(mimalloc PUBLIC
       $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
       $<INSTALL_INTERFACE:${mi_install_incdir}>
@@ -370,7 +370,7 @@ if (MI_BUILD_STATIC)
   set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON)
   target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB)
   target_compile_options(mimalloc-static PRIVATE ${mi_cflags})
-  target_link_libraries(mimalloc-static PUBLIC ${mi_libraries})
+  target_link_libraries(mimalloc-static PRIVATE ${mi_libraries})
   target_include_directories(mimalloc-static PUBLIC
       $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
       $<INSTALL_INTERFACE:${mi_install_incdir}>

From fe300ca14107e755e5973d3eba344cb16c49fa98 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 22 Nov 2022 10:58:02 -0800
Subject: [PATCH 35/88] fix extern inline definition (issue #643)

---
 src/alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/alloc.c b/src/alloc.c
index 38503dbb..64708d7b 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -121,7 +121,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si
 }
 
 // The main allocation function
-inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept {
+extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept {
   if mi_likely(size <= MI_SMALL_SIZE_MAX) {
     mi_assert_internal(huge_alignment == 0);
     return mi_heap_malloc_small_zero(heap, size, zero);

From 34910664f1038c22df88ec37a822a47e872806f7 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 14:46:23 -0800
Subject: [PATCH 36/88] add mi_heap_new_ and mi_heap_new_n

---
 include/mimalloc.h |  3 +++
 src/alloc.c        | 47 ++++++++++++++++++++++++++++++----------------
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 1fc10b2a..7dc66f5f 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -394,6 +394,9 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, s
 mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize)                mi_attr_alloc_size(2);
 mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3);
 
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_new_(mi_heap_t* heap, size_t size)                mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/alloc.c b/src/alloc.c
index 64708d7b..5e26f689 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -904,20 +904,46 @@ static bool mi_try_new_handler(bool nothrow) {
 }
 #endif
 
-static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) {
+static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) {
   void* p = NULL;
   while(p == NULL && mi_try_new_handler(nothrow)) {
-    p = mi_malloc(size);
+    p = mi_heap_malloc(heap,size);
   }
   return p;
 }
 
-mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
-  void* p = mi_malloc(size);
-  if mi_unlikely(p == NULL) return mi_try_new(size,false);
+static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
+  return mi_heap_try_new(mi_get_default_heap(), size, nothrow);
+}
+
+
+mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_new_(mi_heap_t* heap, size_t size) {
+  void* p = mi_heap_malloc(heap,size);
+  if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
   return p;
 }
 
+mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
+  return mi_heap_new_(mi_get_default_heap(), size);
+}
+
+
+mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_new_n(mi_heap_t* heap, size_t count, size_t size) {
+  size_t total;
+  if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
+    mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
+    return NULL;
+  }
+  else {
+    return mi_heap_new_(heap,total);
+  }
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
+  return mi_heap_new_n(mi_get_default_heap(), size, count);
+}
+
+
 mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept {
   void* p = mi_malloc(size);
   if mi_unlikely(p == NULL) return mi_try_new(size, true);
@@ -942,17 +968,6 @@ mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, siz
   return p;
 }
 
-mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
-  size_t total;
-  if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
-    mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
-    return NULL;
-  }
-  else {
-    return mi_new(total);
-  }
-}
-
 mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) {
   void* q;
   do {

From 6e2b077b3565d8f6bb67d87300245878be11ceba Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 14:48:26 -0800
Subject: [PATCH 37/88] rename to heap_alloc_new and heap_alloc_new_n

---
 include/mimalloc.h |  4 ++--
 src/alloc.c        | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 7dc66f5f..57f31336 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -394,8 +394,8 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, s
 mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize)                mi_attr_alloc_size(2);
 mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3);
 
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_new_(mi_heap_t* heap, size_t size)                mi_attr_malloc mi_attr_alloc_size(2);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size)                mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3);
 
 #ifdef __cplusplus
 }
diff --git a/src/alloc.c b/src/alloc.c
index 5e26f689..84a9fb43 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -917,30 +917,30 @@ static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
 }
 
 
-mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_new_(mi_heap_t* heap, size_t size) {
+mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
   void* p = mi_heap_malloc(heap,size);
   if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
   return p;
 }
 
 mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
-  return mi_heap_new_(mi_get_default_heap(), size);
+  return mi_heap_alloc_new(mi_get_default_heap(), size);
 }
 
 
-mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_new_n(mi_heap_t* heap, size_t count, size_t size) {
+mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
   size_t total;
   if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
     mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
     return NULL;
   }
   else {
-    return mi_heap_new_(heap,total);
+    return mi_heap_alloc_new(heap,total);
   }
 }
 
 mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
-  return mi_heap_new_n(mi_get_default_heap(), size, count);
+  return mi_heap_alloc_new_n(mi_get_default_heap(), size, count);
 }
 
 

From 9617f16df95095de18564c5a8d3c421c3557d09b Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 16:58:32 -0800
Subject: [PATCH 38/88] add STL allocators that use a specific heap and can
 destroy at the end; see original PR #625 by @vmarkovtsev

---
 include/mimalloc.h     | 118 +++++++++++++++++++++++++++++++++++++++++
 src/alloc.c            |   5 +-
 test/main-override.cpp |  41 ++++++++++++++
 3 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 57f31336..d43a4419 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -456,6 +456,124 @@ template<class T> struct mi_stl_allocator {
 
 template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; }
 template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; }
+
+#if (__cplusplus >= 201103L) || (_MSC_VER > 1900)  // C++11
+#include <memory>      // std::shared_ptr
+
+// STL allocator allocation in a specific heap
+template<class T> struct mi_heap_stl_allocator {
+  typedef T                 value_type;
+  typedef std::size_t       size_type;
+  typedef std::ptrdiff_t    difference_type;
+  typedef value_type& reference;
+  typedef value_type const& const_reference;
+  typedef value_type* pointer;
+  typedef value_type const* const_pointer;
+  template <class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
+
+  mi_heap_stl_allocator() { 
+    mi_heap_t* hp = mi_heap_new();
+    this->heap.reset(hp, heap_delete);
+  }
+  mi_heap_stl_allocator(mi_heap_t* hp) : heap(hp) { }    /* will not delete or destroy the passed in heap */
+  mi_heap_stl_allocator(const mi_heap_stl_allocator& x) mi_attr_noexcept : heap(x.heap) { }
+  template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : heap(x.heap) { }  
+  mi_heap_stl_allocator  select_on_container_copy_construction() const { return *this; }
+  void deallocate(T* p, size_type) { mi_free(p); }
+
+  #if (__cplusplus >= 201703L)  // C++17
+  mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
+  mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
+  #else
+  mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); }
+  #endif
+
+  #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900))  // C++11
+  using propagate_on_container_copy_assignment = std::true_type;
+  using propagate_on_container_move_assignment = std::true_type;
+  using propagate_on_container_swap = std::true_type;
+  using is_always_equal = std::false_type;
+  template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
+  template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
+  #else
+  void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
+  void destroy(pointer p) { p->~value_type(); }
+  #endif
+
+  size_type     max_size() const mi_attr_noexcept { return (PTRDIFF_MAX / sizeof(value_type)); }
+  pointer       address(reference x) const { return &x; }
+  const_pointer address(const_reference x) const { return &x; }
+
+  void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
+
+protected:
+  std::shared_ptr<mi_heap_t> heap;
+
+private:
+  static void heap_delete(mi_heap_t* hp)  { if (hp != NULL) { mi_heap_delete(hp); } }
+};
+
+template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap == y.heap); }
+template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap != y.heap); }
+
+
+// STL allocator allocation in a specific heap, where `free` does nothing and 
+// the heap is destroyed in one go on destruction -- use with care!
+template<class T> struct mi_heap_destroy_stl_allocator {
+  typedef T                 value_type;
+  typedef std::size_t       size_type;
+  typedef std::ptrdiff_t    difference_type;
+  typedef value_type& reference;
+  typedef value_type const& const_reference;
+  typedef value_type* pointer;
+  typedef value_type const* const_pointer;
+  template <class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
+
+  mi_heap_destroy_stl_allocator() {
+    mi_heap_t* hp = mi_heap_new();
+    this->heap.reset(hp, heap_destroy);
+  }
+  mi_heap_destroy_stl_allocator(mi_heap_t* hp) : heap(hp) { }    /* will not delete or destroy the passed-in heap; nor free any allocated objects it allocates in the heap! */
+  mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator& x) mi_attr_noexcept : heap(x.heap) { }
+  template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : heap(x.heap) { }
+  mi_heap_destroy_stl_allocator  select_on_container_copy_construction() const { return *this; }
+  void deallocate(T* p, size_type) { /* do nothing as we destroy the heap on destruct. */ }
+
+  #if (__cplusplus >= 201703L)  // C++17
+  mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
+  mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
+  #else
+  mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); }
+  #endif
+
+  #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900))  // C++11
+  using propagate_on_container_copy_assignment = std::true_type;
+  using propagate_on_container_move_assignment = std::true_type;
+  using propagate_on_container_swap = std::true_type;
+  using is_always_equal = std::false_type;
+  template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
+  template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
+  #else
+  void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
+  void destroy(pointer p) { p->~value_type(); }
+  #endif
+
+  size_type     max_size() const mi_attr_noexcept { return (PTRDIFF_MAX / sizeof(value_type)); }
+  pointer       address(reference x) const { return &x; }
+  const_pointer address(const_reference x) const { return &x; }
+
+// protected:
+  std::shared_ptr<mi_heap_t> heap;
+
+private:
+  static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } }
+};
+
+template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap == y.heap); }
+template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap != y.heap); }
+
+#endif // C++11
+
 #endif // __cplusplus
 
 #endif
diff --git a/src/alloc.c b/src/alloc.c
index 84a9fb43..21ad3bb3 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -91,7 +91,10 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
 
 static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
   mi_assert(heap != NULL);
-  mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
+  #if MI_DEBUG 
+  const uintptr_t tid = _mi_thread_id();
+  mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local
+  #endif
   mi_assert(size <= MI_SMALL_SIZE_MAX);
 #if (MI_PADDING)
   if (size == 0) {
diff --git a/test/main-override.cpp b/test/main-override.cpp
index f748c75a..f5cb3668 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -36,6 +36,8 @@ static void fail_aslr();              // issue #372
 static void tsan_numa_test();         // issue #414
 static void strdup_test();     // issue #445
 
+static void test_stl_allocators();
+
 int main() {
   mi_stats_reset();  // ignore earlier allocations
   heap_thread_free_large();
@@ -46,6 +48,8 @@ int main() {
   tsan_numa_test();
   strdup_test();
 
+  test_stl_allocators();
+
   test_mt_shutdown();
   //fail_aslr();
   mi_stats_print(NULL);
@@ -122,6 +126,43 @@ static bool test_stl_allocator2() {
   return vec.size() == 0;
 }
 
+static bool test_stl_allocator3() {
+  std::vector<int, mi_heap_stl_allocator<int> > vec;
+  vec.push_back(1);
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+static bool test_stl_allocator4() {
+  std::vector<some_struct, mi_heap_stl_allocator<some_struct> > vec;
+  vec.push_back(some_struct());
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+static bool test_stl_allocator5() {
+  std::vector<int, mi_heap_destroy_stl_allocator<int> > vec;
+  vec.push_back(1);
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+static bool test_stl_allocator6() {
+  std::vector<some_struct, mi_heap_destroy_stl_allocator<some_struct> > vec;
+  vec.push_back(some_struct());
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+static void test_stl_allocators() {
+  test_stl_allocator1();
+  test_stl_allocator2();
+  test_stl_allocator3();
+  test_stl_allocator4();
+  test_stl_allocator5();
+  test_stl_allocator6();
+}
+
 // issue 445
 static void strdup_test() {
 #ifdef _MSC_VER

From 061bbe25b0837f6a1cd073756c55d888571786df Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 17:17:14 -0800
Subject: [PATCH 39/88] update readme

---
 readme.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/readme.md b/readme.md
index fe2ead69..64cd0c4b 100644
--- a/readme.md
+++ b/readme.md
@@ -27,6 +27,8 @@ It also has an easy way to override the default allocator in [Windows](#override
   to integrate and adapt in other projects. For runtime systems it
   provides hooks for a monotonic _heartbeat_ and deferred freeing (for
   bounded worst-case times with reference counting).
+  Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS, 
+  Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding.
 - __free list sharding__: instead of one big free list (per size class) we have
   many smaller lists per "mimalloc page" which reduces fragmentation and
   increases locality --
@@ -42,7 +44,7 @@ It also has an easy way to override the default allocator in [Windows](#override
   similar to randomized algorithms like skip lists where adding
   a random oracle removes the need for a more complex algorithm.
 - __eager page reset__: when a "page" becomes empty (with increased chance
-  due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged")
+  due to free list sharding) the memory is marked to the OS as unused (reset or decommitted)
   reducing (real) memory pressure and fragmentation, especially in long running
   programs.
 - __secure__: _mimalloc_ can be built in secure mode, adding guard pages,
@@ -52,13 +54,12 @@ It also has an easy way to override the default allocator in [Windows](#override
 - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
   A heap can be destroyed at once instead of deallocating each object separately.  
 - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
-  times (_wcat_), bounded space overhead (~0.2% meta-data, with low internal fragmentation),
-  and has no internal points of contention using only atomic operations.
+  times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low 
+  internal fragmentation), and has no internal points of contention using only atomic operations.
 - __fast__: In our benchmarks (see [below](#performance)),
   _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
-  and often uses less memory. A nice property
-  is that it does consistently well over a wide range of benchmarks. There is also good huge OS page
-  support for larger server programs.
+  and often uses less memory. A nice property is that it does consistently well over a wide range 
+  of benchmarks. There is also good huge OS page support for larger server programs.
 
 The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API.
 You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results.   

From 7dce31f74385f713749b4a902cc16d31f211c01b Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 18:13:27 -0800
Subject: [PATCH 40/88] reenable decommitting of a huge aligned prefix

---
 src/segment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index b922a50c..b67fce87 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1261,7 +1261,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   mi_segments_track_size(-(long)segment->segment_size, tld);
   mi_page_t* page = mi_segment_find_free(segment, tld);
   mi_assert_internal(page != NULL);
-#if MI_DEBUG > 3
+
   if (page_alignment > 0) {
     size_t psize;
     size_t pre_size;
@@ -1276,7 +1276,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
       _mi_mem_decommit(decommit_start, decommit_size, os_tld);
     }
   }
-#endif
+
   // for huge pages we initialize the xblock_size as we may
   // overallocate to accommodate large alignments.
   size_t psize;

From 78690fbec20d2bea57a82259a4ec20b6467a4777 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 18:41:51 -0800
Subject: [PATCH 41/88] fix proteced status in stl allocator (pr #625)#

---
 include/mimalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index d43a4419..86856900 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -506,7 +506,7 @@ template<class T> struct mi_heap_stl_allocator {
 
   void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
 
-protected:
+// protected:
   std::shared_ptr<mi_heap_t> heap;
 
 private:

From 00a42bf379a166ccc64ee21fd0b751420ae271ef Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 18:42:05 -0800
Subject: [PATCH 42/88] refactor mi_segment_init

---
 src/segment.c | 158 +++++++++++++++++++-------------------------------
 1 file changed, 59 insertions(+), 99 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index b67fce87..e22417c9 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -496,11 +496,50 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
    Segment allocation
 ----------------------------------------------------------- */
 
-// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, size_t pre_size, size_t info_size,
+                                         size_t* segment_size, bool* is_zero, bool* commit, mi_segments_tld_t* tld, mi_os_tld_t* tld_os)
 {
-  // the segment parameter is non-null if it came from our cache
-  mi_assert_internal(segment==NULL || (required==0 && page_kind <= MI_PAGE_LARGE));
+  size_t memid;
+  bool   mem_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy
+  bool   is_pinned = false;
+  size_t align_offset = 0;
+  size_t alignment = MI_SEGMENT_SIZE;
+  if (page_alignment > 0) {
+    alignment = page_alignment;
+    align_offset = _mi_align_up(pre_size, MI_SEGMENT_SIZE);
+    *segment_size = *segment_size + (align_offset - pre_size);
+  }
+
+  mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os);
+  if (segment == NULL) return NULL;  // failed to allocate
+  if (!commit) {
+    // ensure the initial info is committed
+    mi_assert_internal(!mem_large && !is_pinned);
+    bool commit_zero = false;
+    bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld_os);
+    if (commit_zero) *is_zero = true;
+    if (!ok) {
+      // commit failed; we cannot touch the memory: free the segment directly and return `NULL`
+      _mi_mem_free(segment, *segment_size, alignment, align_offset, memid, false, false, tld_os);
+      return NULL;
+    }
+  }
+
+  mi_track_mem_undefined(segment, info_size);
+  segment->memid = memid;
+  segment->mem_is_pinned = (mem_large || is_pinned);
+  segment->mem_is_committed = commit;
+  segment->mem_alignment = alignment;
+  segment->mem_align_offset = align_offset;
+  mi_segments_track_size((long)(*segment_size), tld);
+  return segment;
+}
+
+// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
+static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+{
+  // required is only > 0 for huge page allocations
+  mi_assert_internal((required > 0 && page_kind > MI_PAGE_LARGE)|| (required==0 && page_kind <= MI_PAGE_LARGE));
 
   // calculate needed sizes first
   size_t capacity;
@@ -527,105 +566,29 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
                               tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   const bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager; // || (page_kind >= MI_PAGE_LARGE);
-  bool pages_still_good = false;
   bool is_zero = false;
-
-  // Try to get it from our thread local cache first
-  if (segment != NULL) {
-    // came from cache
-    mi_track_mem_defined(segment,info_size);  
-    mi_assert_internal(segment->segment_size == segment_size);
-    if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) {
-      pages_still_good = true;
-    }
-    else
-    {
-      if (MI_SECURE!=0) {
-        mi_assert_internal(!segment->mem_is_pinned);
-        mi_segment_protect(segment, false, tld->os); // reset protection if the page kind differs
-      }
-      // different page kinds; unreset any reset pages, and unprotect
-      // TODO: optimize cache pop to return fitting pages if possible?
-      for (size_t i = 0; i < segment->capacity; i++) {
-        mi_page_t* page = &segment->pages[i];
-        if (page->is_reset) {
-          if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) {
-            page->is_reset = false;
-          }
-          else {
-            mi_page_unreset(segment, page, 0, tld);  // todo: only unreset the part that was reset? (instead of the full page)
-          }
-        }
-      }
-      // ensure the initial info is committed
-      if (segment->capacity < capacity) {
-        bool commit_zero = false;
-        bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld->os);
-        if (commit_zero) is_zero = true;
-        if (!ok) {
-          return NULL;
-        }
-      }
-    }
-  }
-  else {
-    // Allocate the segment from the OS
-    size_t memid;
-    bool   mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
-    bool   is_pinned = false;
-    size_t align_offset = 0;
-    size_t alignment = MI_SEGMENT_SIZE;
-    if (page_alignment > 0) {
-      alignment = page_alignment;
-      align_offset = _mi_align_up( pre_size, MI_SEGMENT_SIZE );
-      segment_size += (align_offset - pre_size);
-    }
-    segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
-    if (segment == NULL) return NULL;  // failed to allocate
-    if (!commit) {
-      // ensure the initial info is committed
-      mi_assert_internal(!mem_large && !is_pinned);
-      bool commit_zero = false;
-      bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld->os);
-      if (commit_zero) is_zero = true;
-      if (!ok) {
-        // commit failed; we cannot touch the memory: free the segment directly and return `NULL`
-        _mi_mem_free(segment, segment_size, alignment, align_offset, memid, false, false, os_tld);
-        return NULL;  
-      }
-    }
-    mi_track_mem_undefined(segment,info_size);
-    segment->memid = memid;
-    segment->mem_is_pinned = (mem_large || is_pinned);
-    segment->mem_is_committed = commit;    
-    segment->mem_alignment = alignment;
-    segment->mem_align_offset = align_offset;
-    mi_segments_track_size((long)segment_size, tld);
-  }  
+  
+  // Allocate the segment from the OS (segment_size can change due to alignment)
+  mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, pre_size, info_size, &segment_size, &is_zero, &commit, tld, os_tld);
+  if (segment == NULL) return NULL;
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
   mi_assert_internal(segment->mem_is_pinned ? segment->mem_is_committed : true);    
     
   mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);  // tsan
-  if (!pages_still_good) {
-    // zero the segment info (but not the `mem` fields)
-    ptrdiff_t ofs = offsetof(mi_segment_t, next);
-    memset((uint8_t*)segment + ofs, 0, info_size - ofs);
+  
+  // zero the segment info (but not the `mem` fields)
+  ptrdiff_t ofs = offsetof(mi_segment_t, next);
+  memset((uint8_t*)segment + ofs, 0, info_size - ofs);
 
-    // initialize pages info
-    for (size_t i = 0; i < capacity; i++) {
-      mi_assert_internal(i <= 255);
-      segment->pages[i].segment_idx = (uint8_t)i;
-      segment->pages[i].is_reset = false;
-      segment->pages[i].is_committed = commit;
-      segment->pages[i].is_zero_init = is_zero;
-    }
+  // initialize pages info
+  for (size_t i = 0; i < capacity; i++) {
+    mi_assert_internal(i <= 255);
+    segment->pages[i].segment_idx = (uint8_t)i;
+    segment->pages[i].is_reset = false;
+    segment->pages[i].is_committed = commit;
+    segment->pages[i].is_zero_init = is_zero;
   }
-  else {
-    // zero the segment info but not the pages info (and mem fields)
-    ptrdiff_t ofs = offsetof(mi_segment_t, next);
-    memset((uint8_t*)segment + ofs, 0, offsetof(mi_segment_t,pages) - ofs);
-  }
-
+  
   // initialize
   segment->page_kind  = page_kind;
   segment->capacity   = capacity;
@@ -648,9 +611,6 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   return segment;
 }
 
-static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
-  return mi_segment_init(NULL, required, page_kind, page_shift, page_alignment, tld, os_tld);
-}
 
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
   MI_UNUSED(force);

From fed883c81f1a462e0d470f30a832824662ccf562 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 18:44:27 -0800
Subject: [PATCH 43/88] refactor mi_segment_init fix

---
 src/segment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index e22417c9..fe53b8ce 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -512,7 +512,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
 
   mi_segment_t* segment = (mi_segment_t*)_mi_mem_alloc_aligned(*segment_size, alignment, align_offset, commit, &mem_large, &is_pinned, is_zero, &memid, tld_os);
   if (segment == NULL) return NULL;  // failed to allocate
-  if (!commit) {
+  if (!(*commit)) {
     // ensure the initial info is committed
     mi_assert_internal(!mem_large && !is_pinned);
     bool commit_zero = false;
@@ -525,7 +525,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
     }
   }
 
-  mi_track_mem_undefined(segment, info_size);
+  mi_track_mem_undefined(segment, info_size);   MI_UNUSED(info_size);
   segment->memid = memid;
   segment->mem_is_pinned = (mem_large || is_pinned);
   segment->mem_is_committed = commit;

From 969d8bc5feade7c327b08aadb4c6944e5d341113 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 20:56:35 -0800
Subject: [PATCH 44/88] make huge pages part of the regular page queues again

---
 include/mimalloc-internal.h |  6 ++++++
 include/mimalloc-types.h    |  7 +++++++
 src/alloc-aligned.c         |  4 +++-
 src/alloc.c                 | 30 +++++++++++++++++++++++++++---
 src/os.c                    | 18 ++++--------------
 src/page-queue.c            |  2 ++
 src/page.c                  | 30 ++++++++++++++++++++++++------
 src/region.c                | 15 +++++++++++++--
 src/segment.c               | 21 ++++++++++++++++++---
 test/main-override-static.c |  2 +-
 10 files changed, 105 insertions(+), 30 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 0ecc3057..cb4c79af 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -82,6 +82,7 @@ void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocat
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 size_t     _mi_os_good_alloc_size(size_t size);
 bool       _mi_os_has_overcommit(void);
+bool       _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
 
 void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
 void       _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
@@ -105,7 +106,12 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
 void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
 void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
 uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page
+
+#if MI_HUGE_PAGE_ABANDON
 void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
+#else
+void       _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
+#endif
 
 void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 5c64be4e..d1cceb9b 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -71,6 +71,13 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 
 
+// We used to abandon huge pages but to eagerly deallocate if freed from another thread,
+// but that makes it not possible to visit them during a heap walk or include them in a
+// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from
+// another thread so most memory is available until it gets properly freed by the owning thread.
+// #define MI_HUGE_PAGE_ABANDON 1
+
+
 // ------------------------------------------------------
 // Platform specific values
 // ------------------------------------------------------
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index ffc51edc..b72600ea 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -59,7 +59,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask);
   mi_assert_internal(adjust <= alignment);
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
-  if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true);  }
+  if (aligned_p != p) {
+    mi_page_set_has_aligned(_mi_ptr_page(p), true);  
+  }
   
   mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
   mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
diff --git a/src/alloc.c b/src/alloc.c
index 21ad3bb3..6b63eb0c 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -334,6 +334,17 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
     mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
 #endif
   }
+#if !MI_HUGE_PAGE_ABANDON
+  else {
+    const size_t bpsize = mi_page_block_size(page);
+    if (bpsize <= MI_HUGE_OBJ_SIZE_MAX) {
+      mi_heap_stat_decrease(heap, huge, bpsize);
+    }
+    else {
+      mi_heap_stat_decrease(heap, giant, bpsize);
+    }
+  }
+#endif
 }
 #else
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
@@ -341,6 +352,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
 }
 #endif
 
+#if MI_HUGE_PAGE_ABANDON 
 #if (MI_STAT>0)
 // maintain stats for huge objects
 static void mi_stat_huge_free(const mi_page_t* page) {
@@ -358,12 +370,13 @@ static void mi_stat_huge_free(const mi_page_t* page) {
   MI_UNUSED(page);
 }
 #endif
+#endif
 
 // ------------------------------------------------------
 // Free
 // ------------------------------------------------------
 
-// multi-threaded free (or free in huge block)
+// multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
 static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
 {
   // The padding check may access the non-thread-owned page for the key values.
@@ -371,13 +384,21 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
   mi_check_padding(page, block);
   mi_padding_shrink(page, block, sizeof(mi_block_t));       // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
   
-  // huge page segments are always abandoned and can be freed immediately
   mi_segment_t* const segment = _mi_page_segment(page);
-  if (segment->page_kind==MI_PAGE_HUGE) {
+  if (segment->page_kind == MI_PAGE_HUGE) {
+    #if MI_HUGE_PAGE_ABANDON
+    // huge page segments are always abandoned and can be freed immediately
     mi_stat_huge_free(page);
     _mi_segment_huge_page_free(segment, page, block);
     return;
+    #else
+    // huge pages are special as they occupy the entire segment 
+    // as these are large we reset the memory occupied by the page so it is available to other threads
+    // (as the owning thread needs to actually free the memory later).
+    _mi_segment_huge_page_reset(segment, page, block);
+    #endif
   }
+  
 
   #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED                    // note: when tracking, cannot use mi_usable_size with multi-threading
   memset(block, MI_DEBUG_FREED, mi_usable_size(block));
@@ -453,6 +474,9 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
 // Adjust a block that was allocated aligned, to the actual start of the block in the page.
 mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) {
   mi_assert_internal(page!=NULL && p!=NULL);
+  #if !MI_HUGE_PAGE_ABANDON
+  // if (segment->mem_align_offset != 0) return (mi_block_t*)p; // don't unalign blocks that have huge alignment
+  #endif
   const size_t diff   = (uint8_t*)p - _mi_page_start(segment, page, NULL);
   const size_t adjust = (diff % mi_page_block_size(page));
   return (mi_block_t*)((uintptr_t)p - adjust);
diff --git a/src/os.c b/src/os.c
index c2d53e5b..6631f65f 100644
--- a/src/os.c
+++ b/src/os.c
@@ -1007,7 +1007,7 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
   return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
 }
 
-static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {  
+bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {  
   return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats);
 }
 
@@ -1072,24 +1072,14 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
 bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
   MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_decommit(addr, size, stats);
-  }
-  else {
-    return mi_os_resetx(addr, size, true, stats);
-  }
+  return mi_os_resetx(addr, size, true, stats);
 }
 
 bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
   MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return mi_os_commit_unreset(addr, size, is_zero, stats);  // re-commit it (conservatively!)
-  }
-  else {
-    *is_zero = false;
-    return mi_os_resetx(addr, size, false, stats);
-  }
+  *is_zero = false;
+  return mi_os_resetx(addr, size, false, stats);  
 }
 
 
diff --git a/src/page-queue.c b/src/page-queue.c
index 26b10800..61cd1310 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -228,7 +228,9 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
 static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
   mi_assert_internal(mi_page_heap(page) == heap);
   mi_assert_internal(!mi_page_queue_contains(queue, page));
+  #if MI_HUGE_PAGE_ABANDON
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
+  #endif
   mi_assert_internal(page->xblock_size == queue->block_size ||
                       (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
                         (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
diff --git a/src/page.c b/src/page.c
index 7c3a30a8..5a147b51 100644
--- a/src/page.c
+++ b/src/page.c
@@ -111,7 +111,10 @@ bool _mi_page_is_valid(mi_page_t* page) {
   if (mi_page_heap(page)!=NULL) {
     mi_segment_t* segment = _mi_page_segment(page);
     mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
-    if (segment->page_kind != MI_PAGE_HUGE) {
+    #if MI_HUGE_PAGE_ABANDON
+    if (segment->page_kind != MI_PAGE_HUGE) 
+    #endif
+    {
       mi_page_queue_t* pq = mi_page_queue_of(page);
       mi_assert_internal(mi_page_queue_contains(pq, page));
       mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
@@ -243,7 +246,9 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_expensive(mi_page_is_valid_init(page));
   mi_assert_internal(mi_page_heap(page) == heap);
   mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
+  #if MI_HUGE_PAGE_ABANDON
   mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
+  #endif
   mi_assert_internal(!page->is_reset);
   // TODO: push on full queue immediately if it is full?
   mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
@@ -253,22 +258,27 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
 
 // allocate a fresh page from a segment
 static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) {
-  mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq));
-  mi_assert_internal(pq==NULL||block_size == pq->block_size);
+  #if !MI_HUGE_PAGE_ABANDON
+  mi_assert_internal(pq != NULL);
+  mi_assert_internal(mi_heap_contains_queue(heap, pq));
+  mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size);
+  #endif  
   mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
   if (page == NULL) {
     // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
     return NULL;
   }
+  #if MI_HUGE_PAGE_ABANDON
   mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
+  #endif
   mi_assert_internal(pq!=NULL || page->xblock_size != 0);
   mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
   // a fresh page was found, initialize it
-  const size_t full_block_size = (pq == NULL ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
+  const size_t full_block_size = ((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
   mi_assert_internal(full_block_size >= block_size);
   mi_page_init(heap, page, full_block_size, heap->tld);
   mi_heap_stat_increase(heap, pages, 1);
-  if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
+  if (pq != NULL) { mi_page_queue_push(heap, pq, page); }
   mi_assert_expensive(_mi_page_is_valid(page));
   return page;
 }
@@ -799,15 +809,23 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
 static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
   size_t block_size = _mi_os_good_alloc_size(size);
   mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
-  mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size,page_alignment);
+  #if MI_HUGE_PAGE_ABANDON
+  mi_page_queue_t* pq = NULL;
+  #else
+  mi_page_queue_t* pq = mi_page_queue(heap, MI_HUGE_OBJ_SIZE_MAX); // not block_size as that can be low if the page_alignment > 0
+  mi_assert_internal(mi_page_queue_is_huge(pq));
+  #endif
+  mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size,page_alignment);
   if (page != NULL) {
     const size_t bsize = mi_page_block_size(page);  // note: not `mi_page_usable_block_size` as `size` includes padding already
     mi_assert_internal(bsize >= size);
     mi_assert_internal(mi_page_immediate_available(page));
     mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE);
     mi_assert_internal(_mi_page_segment(page)->used==1);
+    #if MI_HUGE_PAGE_ABANDON
     mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
     mi_page_set_heap(page, NULL);
+    #endif    
 
     if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
       mi_heap_stat_increase(heap, giant, bsize);
diff --git a/src/region.c b/src/region.c
index f069502f..7bcc951a 100644
--- a/src/region.c
+++ b/src/region.c
@@ -47,6 +47,7 @@ bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+bool    _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
 
 // arena.c
 mi_arena_id_t _mi_arena_id_none(void);
@@ -481,11 +482,21 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
 -----------------------------------------------------------------------------*/
 
 bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  return _mi_os_reset(p, size, tld->stats);
+  if (mi_option_is_enabled(mi_option_reset_decommits)) {
+    return _mi_os_decommit(p, size, tld->stats);
+  }
+  else {
+    return _mi_os_reset(p, size, tld->stats);
+  }
 }
 
 bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  return _mi_os_unreset(p, size, is_zero, tld->stats);
+  if (mi_option_is_enabled(mi_option_reset_decommits)) {
+    return _mi_os_commit(p, size, is_zero, tld->stats);
+  }
+  else {
+    return _mi_os_unreset(p, size, is_zero, tld->stats);
+  }
 }
 
 bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
diff --git a/src/segment.c b/src/segment.c
index fe53b8ce..be496f21 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1217,8 +1217,10 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, tld, os_tld);
   if (segment == NULL) return NULL;
   mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size);
+  #if MI_HUGE_PAGE_ABANDON
   segment->thread_id = 0; // huge pages are immediately abandoned
   mi_segments_track_size(-(long)segment->segment_size, tld);
+  #endif  
   mi_page_t* page = mi_segment_find_free(segment, tld);
   mi_assert_internal(page != NULL);
 
@@ -1230,10 +1232,10 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
     mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
     mi_assert_internal(psize - (aligned_p - p) >= size);
     if (!segment->mem_is_pinned && page->is_committed) {
-       // decommit the part of the page that is unused; this can be quite large (close to MI_SEGMENT_SIZE)
+      // decommit the part of the page that is unused; this can be quite large (close to MI_SEGMENT_SIZE)
       uint8_t* decommit_start = p + sizeof(mi_block_t); // for the free list
       ptrdiff_t decommit_size = aligned_p - decommit_start;
-      _mi_mem_decommit(decommit_start, decommit_size, os_tld);
+      _mi_os_reset(decommit_start, decommit_size, os_tld->stats);
     }
   }
 
@@ -1245,6 +1247,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   return page;
 }
 
+#if MI_HUGE_PAGE_ABANDON
 // free huge block from another thread
 void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
   // huge page segments are always abandoned and can be freed immediately by any thread
@@ -1273,6 +1276,18 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
 #endif
 }
 
+#else 
+// reset memory of a huge block from another thread 
+void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
+  mi_assert_internal(segment->page_kind == MI_PAGE_HUGE);
+  mi_assert_internal(segment == _mi_page_segment(page));
+  mi_assert_internal(page->used == 1); // this is called just before the free
+  mi_assert_internal(page->free == NULL);
+  const size_t bsize = mi_page_block_size(page);
+  _mi_os_reset(block + 1, bsize - sizeof(mi_block_t), &_mi_stats_main);
+}
+#endif
+
 /* -----------------------------------------------------------
    Page allocation
 ----------------------------------------------------------- */
@@ -1292,7 +1307,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
   else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {
     page = mi_segment_medium_page_alloc(heap, block_size, tld, os_tld);
   }
-  else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {
+  else if (block_size <= MI_LARGE_OBJ_SIZE_MAX /* || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t)) */ ) {
     page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld);
   }
   else {
diff --git a/test/main-override-static.c b/test/main-override-static.c
index d216a330..dbd0fab8 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -205,7 +205,7 @@ static bool test_visit(const mi_heap_t* heap, const mi_heap_area_t* area, void*
 
 static void test_heap_walk(void) {
   mi_heap_t* heap = mi_heap_new();
-  //mi_heap_malloc(heap, 2097152);
+  mi_heap_malloc(heap, 16*2097152);
   mi_heap_malloc(heap, 2067152);
   mi_heap_malloc(heap, 2097160);
   mi_heap_malloc(heap, 24576);

From db0a69b4829b0de1c275c4533de43fdb7b485285 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 20:57:31 -0800
Subject: [PATCH 45/88] remove comment

---
 src/alloc.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 6b63eb0c..7095dc67 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -474,9 +474,6 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
 // Adjust a block that was allocated aligned, to the actual start of the block in the page.
 mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) {
   mi_assert_internal(page!=NULL && p!=NULL);
-  #if !MI_HUGE_PAGE_ABANDON
-  // if (segment->mem_align_offset != 0) return (mi_block_t*)p; // don't unalign blocks that have huge alignment
-  #endif
   const size_t diff   = (uint8_t*)p - _mi_page_start(segment, page, NULL);
   const size_t adjust = (diff % mi_page_block_size(page));
   return (mi_block_t*)((uintptr_t)p - adjust);
@@ -484,7 +481,6 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
 
 
 void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
-  //mi_page_t* const page = _mi_segment_page_of(segment, p);
   mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
   mi_stat_free(page, block);                 // stat_free may access the padding
   mi_track_free(p);

From e7dac7c4059d4ea854c766889122c16d6fe1d262 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 21:02:05 -0800
Subject: [PATCH 46/88] prevent retiring pages in the huge queue (which may
 have a small size but huge alignment)

---
 src/page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/page.c b/src/page.c
index 5a147b51..8a24c13f 100644
--- a/src/page.c
+++ b/src/page.c
@@ -436,7 +436,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
   // how to check this efficiently though...
   // for now, we don't retire if it is the only page left of this size class.
   mi_page_queue_t* pq = mi_page_queue_of(page);
-  if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page)) {
+  if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) {  // not too large && not full or huge queue?
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
       page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);

From 20880807ce275b5ed23eb124d8a4b157eb042dd0 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 22:05:18 -0800
Subject: [PATCH 47/88] remove comment

---
 src/options.c |  2 +-
 src/segment.c | 11 +----------
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/options.c b/src/options.c
index 6c6f8f2f..6980a047 100644
--- a/src/options.c
+++ b/src/options.c
@@ -94,7 +94,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 8,    UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.  
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
   { 500,  UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
-  { 2,    UNINIT, MI_OPTION(decommit_extend_delay) },
+  { 1,    UNINIT, MI_OPTION(decommit_extend_delay) },
   { 0,    UNINIT, MI_OPTION(destroy_on_exit)}     // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
 };
 
diff --git a/src/segment.c b/src/segment.c
index 803bb47b..55ec4615 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -316,7 +316,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
   ptrdiff_t idx = slice - segment->slices;
   size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
   // make the start not OS page aligned for smaller blocks to avoid page/cache effects
-  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); 
+  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); 
   if (page_size != NULL) { *page_size = psize - start_offset; }
   return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
 }
@@ -463,15 +463,6 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
 static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
   mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
 
-  // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes.
-  /*
-  if (commit && size > 0) {
-    const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE);
-    if (p + csize <= mi_segment_end(segment)) {
-      size = csize;
-    }
-  }
-  */
   // commit liberal, but decommit conservative
   uint8_t* start = NULL;
   size_t   full_size = 0;

From 9e56567d23126d0b608487fa4f11d5b59845e50f Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 23 Nov 2022 09:50:29 -0800
Subject: [PATCH 48/88] fix decommit for huge objects

---
 src/alloc.c            |  9 ++++++---
 src/segment.c          | 37 ++++++++++++++++---------------------
 test/main-override.cpp | 18 ++++++++++++++++--
 test/test-stress.c     |  2 +-
 4 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index f602fdcf..ac117f17 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -395,9 +395,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
     #endif
   }
   
-
   #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED                    // note: when tracking, cannot use mi_usable_size with multi-threading
-  memset(block, MI_DEBUG_FREED, mi_usable_size(block));
+  if (segment->kind != MI_SEGMENT_HUGE) {                   // not for huge segments as we just reset the content
+    memset(block, MI_DEBUG_FREED, mi_usable_size(block));
+  }
   #endif
 
   // Try to put the block on either the page-local thread free list, or the heap delayed free list.
@@ -449,7 +450,9 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
     if mi_unlikely(mi_check_is_double_free(page, block)) return;
     mi_check_padding(page, block);
     #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
-    memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+    if (!mi_page_is_huge(page)) {   // huge page content may be already decommitted
+      memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+    }
     #endif
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
diff --git a/src/segment.c b/src/segment.c
index 55ec4615..b054b975 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1522,25 +1522,23 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   #if MI_HUGE_PAGE_ABANDON
   segment->thread_id = 0; // huge segments are immediately abandoned
   #endif  
-  
-  if (page_alignment > 0) {
-    size_t psize;
-    uint8_t* p = _mi_segment_page_start(segment, page, &psize);
-    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)p, page_alignment);
-    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
-    mi_assert_internal(psize - (aligned_p - p) >= size);
-    if (!segment->allow_decommit) {
-      // decommit the part of the page that is unused; this can be quite large (close to MI_SEGMENT_SIZE)
-      uint8_t* decommit_start = p + sizeof(mi_block_t); // for the free list
-      ptrdiff_t decommit_size = aligned_p - decommit_start;
-      mi_segment_decommit(segment, decommit_start, decommit_size, &_mi_stats_main);      
-    }
-  }
+
   // for huge pages we initialize the xblock_size as we may
   // overallocate to accommodate large alignments.
   size_t psize;
-  _mi_segment_page_start(segment, page, &psize);
+  uint8_t* start = _mi_segment_page_start(segment, page, &psize);
   page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize);
+  
+  // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE)
+  if (page_alignment > 0 && segment->allow_decommit) {
+    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment);
+    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
+    mi_assert_internal(psize - (aligned_p - start) >= size);      
+    uint8_t* decommit_start = start + sizeof(mi_block_t);              // for the free list
+    ptrdiff_t decommit_size = aligned_p - decommit_start;
+    _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments    
+  }
+  
   return page;
 }
 
@@ -1579,13 +1577,10 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc
   mi_assert_internal(segment == _mi_page_segment(page));
   mi_assert_internal(page->used == 1); // this is called just before the free
   mi_assert_internal(page->free == NULL);
-  const size_t csize = mi_page_block_size(page) - sizeof(mi_block_t);
-  uint8_t* p = ( uint8_t*)block + sizeof(mi_block_t);
   if (segment->allow_decommit) {
-    mi_segment_decommit(segment, p, csize, &_mi_stats_main);
-  }
-  else {
-    _mi_os_reset(p, csize, &_mi_stats_main);
+    const size_t csize = mi_usable_size(block) - sizeof(mi_block_t);
+    uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
+    _mi_os_decommit(p, csize, &_mi_stats_main);  // note: cannot use segment_decommit on huge segments
   }
 }
 #endif
diff --git a/test/main-override.cpp b/test/main-override.cpp
index b205dc85..e12567d9 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -37,12 +37,14 @@ static void fail_aslr();              // issue #372
 static void tsan_numa_test();         // issue #414
 static void strdup_test();            // issue #445 
 static void bench_alloc_large(void);  // issue #xxx
+static void heap_thread_free_huge();
 
 static void test_stl_allocators();
 
 int main() {
   mi_stats_reset();  // ignore earlier allocations
-
+  heap_thread_free_huge();
+  /*
    heap_thread_free_large();
    heap_no_delete();
    heap_late_free();
@@ -51,7 +53,7 @@ int main() {
    large_alloc();
    tsan_numa_test();
    strdup_test();
-
+  */
   test_stl_allocators();
 
   test_mt_shutdown();
@@ -240,6 +242,18 @@ static void heap_thread_free_large() {
   }
 }
 
+static void heap_thread_free_huge_worker() {
+  mi_free(shared_p);
+}
+
+static void heap_thread_free_huge() {
+  for (int i = 0; i < 100; i++) {
+    shared_p = mi_malloc(1024 * 1024 * 1024);
+    auto t1 = std::thread(heap_thread_free_large_worker);
+    t1.join();
+  }
+}
+
 
 
 static void test_mt_shutdown()
diff --git a/test/test-stress.c b/test/test-stress.c
index 61171d03..b766a5ca 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -91,7 +91,7 @@ static bool chance(size_t perc, random_t r) {
 
 static void* alloc_items(size_t items, random_t r) {
   if (chance(1, r)) {
-    if (chance(1, r) && allow_large_objects) items *= 10000;       // 0.01% giant
+    if (chance(1, r) && allow_large_objects) items *= 50000;       // 0.01% giant
     else if (chance(10, r) && allow_large_objects) items *= 1000;  // 0.1% huge
     else items *= 100;                                             // 1% large objects;
   }

From a7bd9c08c8ad7862d138c97f795a7b607b1f4c21 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 23 Nov 2022 09:58:45 -0800
Subject: [PATCH 49/88] fix decommit of huge pages

---
 src/segment.c          | 35 +++++++++++++++++------------------
 test/main-override.cpp | 22 ++++++++++++++++++----
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index be496f21..85cac395 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1224,26 +1224,22 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   mi_page_t* page = mi_segment_find_free(segment, tld);
   mi_assert_internal(page != NULL);
 
-  if (page_alignment > 0) {
-    size_t psize;
-    size_t pre_size;
-    uint8_t* p = (uint8_t*)_mi_segment_page_start(segment, page, 0, &psize, &pre_size);
-    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)p, page_alignment);
-    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
-    mi_assert_internal(psize - (aligned_p - p) >= size);
-    if (!segment->mem_is_pinned && page->is_committed) {
-      // decommit the part of the page that is unused; this can be quite large (close to MI_SEGMENT_SIZE)
-      uint8_t* decommit_start = p + sizeof(mi_block_t); // for the free list
-      ptrdiff_t decommit_size = aligned_p - decommit_start;
-      _mi_os_reset(decommit_start, decommit_size, os_tld->stats);
-    }
-  }
-
   // for huge pages we initialize the xblock_size as we may
   // overallocate to accommodate large alignments.
   size_t psize;
-  _mi_segment_page_start(segment, page, 0, &psize, NULL);
+  uint8_t* start = _mi_segment_page_start(segment, page, 0, &psize, NULL);
   page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize);
+
+  // reset the part of the page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE)
+  if (page_alignment > 0 && !segment->mem_is_pinned && page->is_committed) {
+    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment);
+    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
+    mi_assert_internal(psize - (aligned_p - start) >= size);
+    uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list
+    ptrdiff_t decommit_size = aligned_p - decommit_start;
+    _mi_os_reset(decommit_start, decommit_size, os_tld->stats);  // do not decommit as it may be in a region
+  }
+
   return page;
 }
 
@@ -1283,8 +1279,11 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc
   mi_assert_internal(segment == _mi_page_segment(page));
   mi_assert_internal(page->used == 1); // this is called just before the free
   mi_assert_internal(page->free == NULL);
-  const size_t bsize = mi_page_block_size(page);
-  _mi_os_reset(block + 1, bsize - sizeof(mi_block_t), &_mi_stats_main);
+  if (!segment->mem_is_pinned && page->is_committed) {
+    const size_t usize = mi_usable_size(block) - sizeof(mi_block_t);
+    uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
+    _mi_os_reset(p, usize, &_mi_stats_main); 
+  }
 }
 #endif
 
diff --git a/test/main-override.cpp b/test/main-override.cpp
index f5cb3668..81f57298 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -34,23 +34,26 @@ static void various_tests();
 static void test_mt_shutdown();
 static void fail_aslr();              // issue #372
 static void tsan_numa_test();         // issue #414
-static void strdup_test();     // issue #445
+static void strdup_test();            // issue #445
+static void heap_thread_free_huge();
 
 static void test_stl_allocators();
 
 int main() {
   mi_stats_reset();  // ignore earlier allocations
+
+  heap_thread_free_huge();
+  /*
   heap_thread_free_large();
   heap_no_delete();
   heap_late_free();
   padding_shrink();
   various_tests();
   tsan_numa_test();
-  strdup_test();
-
+  strdup_test();  
   test_stl_allocators();
-
   test_mt_shutdown();
+  */
   //fail_aslr();
   mi_stats_print(NULL);
   return 0;
@@ -235,6 +238,17 @@ static void heap_thread_free_large() {
   }
 }
 
+static void heap_thread_free_huge_worker() {
+  mi_free(shared_p);
+}
+
+static void heap_thread_free_huge() {
+  for (int i = 0; i < 10; i++) {
+    shared_p = mi_malloc(1024 * 1024 * 1024);
+    auto t1 = std::thread(heap_thread_free_large_worker);
+    t1.join();
+  }
+}
 
 
 static void test_mt_shutdown()

From 4b56af178ec681ccd458d821905548231b0ad094 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 23 Nov 2022 10:06:35 -0800
Subject: [PATCH 50/88] remove ,not all freed, messag e from reserved and
 committed stats (as that is expected at normal process exit)

---
 src/stats.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 9a6bb096..1e2fe4d5 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -170,19 +170,23 @@ static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* ar
           else mi_print_amount(n,0,out,arg);
 }
 
-static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) {
+static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) {
   _mi_fprintf(out, arg,"%10s:", msg);
-  if (unit>0) {
+  if (unit > 0) {
     mi_print_amount(stat->peak, unit, out, arg);
     mi_print_amount(stat->allocated, unit, out, arg);
     mi_print_amount(stat->freed, unit, out, arg);
     mi_print_amount(stat->current, unit, out, arg);
     mi_print_amount(unit, 1, out, arg);
     mi_print_count(stat->allocated, unit, out, arg);
-    if (stat->allocated > stat->freed)
-      _mi_fprintf(out, arg, "  not all freed!\n");
-    else
+    if (stat->allocated > stat->freed) {
+      _mi_fprintf(out, arg, "  ");
+      _mi_fprintf(out, arg, (notok == NULL ? "not all freed!" : notok));
+      _mi_fprintf(out, arg, "\n");
+    }
+    else {
       _mi_fprintf(out, arg, "  ok\n");
+    }
   }
   else if (unit<0) {
     mi_print_amount(stat->peak, -1, out, arg);
@@ -210,6 +214,10 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t
   }
 }
 
+static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
+  mi_stat_print_ex(stat, msg, unit, out, arg, NULL);
+}
+
 static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
   _mi_fprintf(out, arg, "%10s:", msg);
   mi_print_amount(stat->total, -1, out, arg);
@@ -312,8 +320,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_print(&stats->malloc, "malloc req", 1, out, arg);
   _mi_fprintf(out, arg, "\n");
   #endif
-  mi_stat_print(&stats->reserved, "reserved", 1, out, arg);
-  mi_stat_print(&stats->committed, "committed", 1, out, arg);
+  mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");
+  mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, "");
   mi_stat_print(&stats->reset, "reset", 1, out, arg);
   mi_stat_print(&stats->page_committed, "touched", 1, out, arg);
   mi_stat_print(&stats->segments, "segments", -1, out, arg);

From 58d12723d6817bb9e8141bc67651ea2a76900970 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 23 Nov 2022 10:34:19 -0800
Subject: [PATCH 51/88] make mi_collect(true) actually free the segment caches

---
 src/segment-cache.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 26786c92..d93fd644 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -133,14 +133,14 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 
 #define MI_MAX_PURGE_PER_PUSH  (4)
 
-static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld)
+static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld)
 {
   MI_UNUSED(tld);
   if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
   mi_msecs_t now = _mi_clock_now();
   size_t purged = 0;
-  const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
-  size_t idx              = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
+  const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
+  size_t idx              = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
   for (size_t visited = 0; visited < max_visits; visited++,idx++) {  // visit N slots
     if (idx >= MI_CACHE_MAX) idx = 0; // wrap
     mi_cache_slot_t* slot = &cache[idx];
@@ -164,13 +164,19 @@ static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld
         }
         _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
-      if (!force && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
+      if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
     }
   }
 }
 
 void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
-  mi_segment_cache_purge(force, tld );
+  if (force) {
+    // called on `mi_collect(true)` but not on thread termination    
+    _mi_segment_cache_free_all(tld);
+  }
+  else {
+    mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld);
+  }
 }
 
 void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
@@ -215,7 +221,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   }
 
   // purge expired entries
-  mi_segment_cache_purge(false /* force? */, tld);
+  mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld);
 
   // find an available slot
   mi_bitmap_index_t bitidx;

From 4cdfc188881c863ba3505defc28ca85839af6415 Mon Sep 17 00:00:00 2001
From: Vadim Markovtsev <gmarkhor@gmail.com>
Date: Thu, 24 Nov 2022 15:13:19 +0100
Subject: [PATCH 52/88] Refactor C++ allocators to be DRY and protected

---
 include/mimalloc.h | 147 ++++++++++++++++++---------------------------
 1 file changed, 59 insertions(+), 88 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 86856900..8fd2eedd 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -414,7 +414,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_h
 #include <utility>     // std::forward
 #endif
 
-template<class T> struct mi_stl_allocator {
+template<class T> struct _mi_stl_allocator_common {
   typedef T                 value_type;
   typedef std::size_t       size_type;
   typedef std::ptrdiff_t    difference_type;
@@ -422,6 +422,25 @@ template<class T> struct mi_stl_allocator {
   typedef value_type const& const_reference;
   typedef value_type*       pointer;
   typedef value_type const* const_pointer;
+  
+  #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900))  // C++11
+  using propagate_on_container_copy_assignment = std::true_type;
+  using propagate_on_container_move_assignment = std::true_type;
+  using propagate_on_container_swap            = std::true_type;
+  template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
+  template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
+  #else
+  void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
+  void destroy(pointer p) { p->~value_type(); }
+  #endif
+
+  size_type     max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); }
+  pointer       address(reference x) const        { return &x; }
+  const_pointer address(const_reference x) const  { return &x; }
+};
+
+template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> {
+  using typename _mi_stl_allocator_common<T>::size_type;
   template <class U> struct rebind { typedef mi_stl_allocator<U> other; };
 
   mi_stl_allocator()                                             mi_attr_noexcept = default;
@@ -438,20 +457,8 @@ template<class T> struct mi_stl_allocator {
   #endif
 
   #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900))  // C++11
-  using propagate_on_container_copy_assignment = std::true_type;
-  using propagate_on_container_move_assignment = std::true_type;
-  using propagate_on_container_swap            = std::true_type;
-  using is_always_equal                        = std::true_type;
-  template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
-  template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
-  #else
-  void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
-  void destroy(pointer p) { p->~value_type(); }
+  using is_always_equal = std::true_type;
   #endif
-
-  size_type     max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); }
-  pointer       address(reference x) const        { return &x; }
-  const_pointer address(const_reference x) const  { return &x; }
 };
 
 template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; }
@@ -460,26 +467,16 @@ template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const
 #if (__cplusplus >= 201103L) || (_MSC_VER > 1900)  // C++11
 #include <memory>      // std::shared_ptr
 
-// STL allocator allocation in a specific heap
-template<class T> struct mi_heap_stl_allocator {
-  typedef T                 value_type;
-  typedef std::size_t       size_type;
-  typedef std::ptrdiff_t    difference_type;
-  typedef value_type& reference;
-  typedef value_type const& const_reference;
-  typedef value_type* pointer;
-  typedef value_type const* const_pointer;
-  template <class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
+template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
+  using typename _mi_stl_allocator_common<T>::size_type;
 
-  mi_heap_stl_allocator() { 
+  _mi_heap_stl_allocator_common() { 
     mi_heap_t* hp = mi_heap_new();
     this->heap.reset(hp, heap_delete);
   }
-  mi_heap_stl_allocator(mi_heap_t* hp) : heap(hp) { }    /* will not delete or destroy the passed in heap */
-  mi_heap_stl_allocator(const mi_heap_stl_allocator& x) mi_attr_noexcept : heap(x.heap) { }
-  template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : heap(x.heap) { }  
-  mi_heap_stl_allocator  select_on_container_copy_construction() const { return *this; }
-  void deallocate(T* p, size_type) { mi_free(p); }
+  _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { }    /* will not delete or destroy the passed in heap */
+  _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
+  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U>& x) mi_attr_noexcept : heap(x.heap) { }  
 
   #if (__cplusplus >= 201703L)  // C++17
   mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
@@ -489,84 +486,58 @@ template<class T> struct mi_heap_stl_allocator {
   #endif
 
   #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900))  // C++11
-  using propagate_on_container_copy_assignment = std::true_type;
-  using propagate_on_container_move_assignment = std::true_type;
-  using propagate_on_container_swap = std::true_type;
   using is_always_equal = std::false_type;
-  template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
-  template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
-  #else
-  void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
-  void destroy(pointer p) { p->~value_type(); }
   #endif
 
-  size_type     max_size() const mi_attr_noexcept { return (PTRDIFF_MAX / sizeof(value_type)); }
-  pointer       address(reference x) const { return &x; }
-  const_pointer address(const_reference x) const { return &x; }
-
   void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
 
-// protected:
+protected:
   std::shared_ptr<mi_heap_t> heap;
 
-private:
   static void heap_delete(mi_heap_t* hp)  { if (hp != NULL) { mi_heap_delete(hp); } }
 };
 
+// STL allocator allocation in a specific heap
+template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T> {
+  using typename _mi_heap_stl_allocator_common<T>::size_type;
+
+  template <class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
+
+  using _mi_heap_stl_allocator_common<T>::_mi_heap_stl_allocator_common;
+  mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
+  void deallocate(T* p, size_type) { mi_free(p); }
+
+protected:
+  template <typename>
+  friend struct mi_heap_stl_allocator;
+  template<class T1,class T2>
+  friend bool operator==(const mi_heap_stl_allocator<T1>& first, const mi_heap_stl_allocator<T2>& second) mi_attr_noexcept;
+  template<class T1,class T2>
+  friend bool operator!=(const mi_heap_stl_allocator<T1>& first, const mi_heap_stl_allocator<T2>& second) mi_attr_noexcept;
+};
+
 template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap == y.heap); }
 template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap != y.heap); }
 
 
 // STL allocator allocation in a specific heap, where `free` does nothing and 
 // the heap is destroyed in one go on destruction -- use with care!
-template<class T> struct mi_heap_destroy_stl_allocator {
-  typedef T                 value_type;
-  typedef std::size_t       size_type;
-  typedef std::ptrdiff_t    difference_type;
-  typedef value_type& reference;
-  typedef value_type const& const_reference;
-  typedef value_type* pointer;
-  typedef value_type const* const_pointer;
+template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T> {
+  using typename _mi_heap_stl_allocator_common<T>::size_type;
+
   template <class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
 
-  mi_heap_destroy_stl_allocator() {
-    mi_heap_t* hp = mi_heap_new();
-    this->heap.reset(hp, heap_destroy);
-  }
-  mi_heap_destroy_stl_allocator(mi_heap_t* hp) : heap(hp) { }    /* will not delete or destroy the passed-in heap; nor free any allocated objects it allocates in the heap! */
-  mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator& x) mi_attr_noexcept : heap(x.heap) { }
-  template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : heap(x.heap) { }
-  mi_heap_destroy_stl_allocator  select_on_container_copy_construction() const { return *this; }
+  using _mi_heap_stl_allocator_common<T>::_mi_heap_stl_allocator_common;
+  mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
   void deallocate(T* p, size_type) { /* do nothing as we destroy the heap on destruct. */ }
 
-  #if (__cplusplus >= 201703L)  // C++17
-  mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
-  mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
-  #else
-  mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); }
-  #endif
-
-  #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900))  // C++11
-  using propagate_on_container_copy_assignment = std::true_type;
-  using propagate_on_container_move_assignment = std::true_type;
-  using propagate_on_container_swap = std::true_type;
-  using is_always_equal = std::false_type;
-  template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
-  template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
-  #else
-  void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
-  void destroy(pointer p) { p->~value_type(); }
-  #endif
-
-  size_type     max_size() const mi_attr_noexcept { return (PTRDIFF_MAX / sizeof(value_type)); }
-  pointer       address(reference x) const { return &x; }
-  const_pointer address(const_reference x) const { return &x; }
-
-// protected:
-  std::shared_ptr<mi_heap_t> heap;
-
-private:
-  static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } }
+protected:
+  template <typename>
+  friend struct mi_heap_destroy_stl_allocator;
+  template<class T1,class T2>
+  friend bool operator==(const mi_heap_destroy_stl_allocator<T1>& first, const mi_heap_destroy_stl_allocator<T2>& second) mi_attr_noexcept;
+  template<class T1,class T2>
+  friend bool operator!=(const mi_heap_destroy_stl_allocator<T1>& first, const mi_heap_destroy_stl_allocator<T2>& second) mi_attr_noexcept;
 };
 
 template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap == y.heap); }

From 78af17e3b301ebbe3b8de5f94a543a46b4fc6932 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 25 Nov 2022 14:03:00 -0800
Subject: [PATCH 53/88] add extra alignment test

---
 test/test-api.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/test/test-api.c b/test/test-api.c
index 65578287..f202e7c1 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -191,6 +191,20 @@ int main(void) {
     }
     result = ok;
   };
+  CHECK_BODY("malloc-aligned10") {
+    bool ok = true;
+    void* p[10+1];
+    int align;    
+    int j;
+    for(j = 0, align = 1; j <= 10 && ok; align *= 2, j++ ) {
+      p[j] = mi_malloc_aligned(43 + align, align);
+      ok = ((uintptr_t)p[j] % align) == 0;            
+    }
+    for ( ; j > 0; j--) {
+      mi_free(p[j-1]);
+    }
+    result = ok;
+  }
   CHECK_BODY("malloc-aligned-at1") {
     void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
   };

From 604d4b259b5dafa57ae52433847e3f06185b01db Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 25 Nov 2022 14:27:48 -0800
Subject: [PATCH 54/88] fix assertion failure (issue #650)

---
 src/alloc-aligned.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index b72600ea..73d5524c 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -56,9 +56,10 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   }
 
   // .. and align within the allocation
-  uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask);
-  mi_assert_internal(adjust <= alignment);
-  void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
+  const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
+  const uintptr_t adjust  = (poffset == 0 ? 0 : alignment - poffset);
+  mi_assert_internal(adjust < alignment);
+  void* aligned_p = (void*)((uintptr_t)p + adjust);
   if (aligned_p != p) {
     mi_page_set_has_aligned(_mi_ptr_page(p), true);  
   }

From 81b261e657bf07d7c4411db46f8fcae8a631c8ff Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 25 Nov 2022 15:44:24 -0800
Subject: [PATCH 55/88] add test for #587

---
 test/test-api.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/test-api.c b/test/test-api.c
index f202e7c1..c6d289de 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -205,6 +205,12 @@ int main(void) {
     }
     result = ok;
   }
+  CHECK_BODY("malloc_aligned11") {
+    mi_heap_t* heap = mi_heap_new();
+    void* p = mi_heap_malloc_aligned(heap, 33554426, 8);
+    result = mi_heap_contains_block(heap, p);
+    mi_heap_destroy(heap);
+  }
   CHECK_BODY("malloc-aligned-at1") {
     void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
   };

From 8098040c23d0267cc3fd7ce2aebb6410fdf991e8 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 25 Nov 2022 16:38:20 -0800
Subject: [PATCH 56/88] add pointer validity check on malloc_size when
 overriding on macOSX; issue #638

---
 src/alloc-override.c | 8 +++++++-
 src/alloc-posix.c    | 4 ++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/alloc-override.c b/src/alloc-override.c
index 9534e9d5..70cf3367 100644
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@@ -51,11 +51,17 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   #define MI_FORWARD02(fun,x,y)   { fun(x,y); }
 #endif
 
+
 #if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)    
   // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for 
   // functions that are interposed (or the interposing does not work)
   #define MI_OSX_IS_INTERPOSED
 
+  mi_decl_externc static size_t mi_malloc_size_checked(void *p) {
+    if (!mi_is_in_heap_region(p)) return 0;
+    return mi_usable_size(p);
+  }
+
   // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
   // See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
   struct mi_interpose_s {
@@ -76,7 +82,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
     MI_INTERPOSE_MI(posix_memalign),
     MI_INTERPOSE_MI(reallocf),
     MI_INTERPOSE_MI(valloc),
-    MI_INTERPOSE_MI(malloc_size),
+    MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked),
     MI_INTERPOSE_MI(malloc_good_size),
     #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 
     MI_INTERPOSE_MI(aligned_alloc),
diff --git a/src/alloc-posix.c b/src/alloc-posix.c
index 57e15d05..e73628f4 100644
--- a/src/alloc-posix.c
+++ b/src/alloc-posix.c
@@ -33,12 +33,12 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 mi_decl_nodiscard size_t mi_malloc_size(const void* p) mi_attr_noexcept {
-  //if (!mi_is_in_heap_region(p)) return 0;
+  // if (!mi_is_in_heap_region(p)) return 0;
   return mi_usable_size(p);
 }
 
 mi_decl_nodiscard size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
-  //if (!mi_is_in_heap_region(p)) return 0;
+  // if (!mi_is_in_heap_region(p)) return 0;
   return mi_usable_size(p);
 }
 

From 6988bbcca04e0c10db1da642676068dd89895f3d Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 27 Nov 2022 12:01:56 -0800
Subject: [PATCH 57/88] fix duplicate definition (issue #652

---
 test/main-override.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/test/main-override.cpp b/test/main-override.cpp
index f1c9a10b..9704a760 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -254,19 +254,6 @@ static void heap_thread_free_huge() {
   }
 }
 
-static void heap_thread_free_huge_worker() {
-  mi_free(shared_p);
-}
-
-static void heap_thread_free_huge() {
-  for (int i = 0; i < 10; i++) {
-    shared_p = mi_malloc(1024 * 1024 * 1024);
-    auto t1 = std::thread(heap_thread_free_large_worker);
-    t1.join();
-  }
-}
-
-
 static void test_mt_shutdown()
 {
   const int threads = 5;

From c1299484def0a5282dcdbe53da79454aafd949ba Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 27 Nov 2022 13:00:15 -0800
Subject: [PATCH 58/88] refactor C++ STL allocator definitions (pr #651)

---
 include/mimalloc.h     | 60 ++++++++++++++++++++----------------------
 test/main-override.cpp |  4 +--
 2 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 8fd2eedd..d70d28ed 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -441,6 +441,8 @@ template<class T> struct _mi_stl_allocator_common {
 
 template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> {
   using typename _mi_stl_allocator_common<T>::size_type;
+  using typename _mi_stl_allocator_common<T>::value_type;
+  using typename _mi_stl_allocator_common<T>::pointer;
   template <class U> struct rebind { typedef mi_stl_allocator<U> other; };
 
   mi_stl_allocator()                                             mi_attr_noexcept = default;
@@ -464,19 +466,17 @@ template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> {
 template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; }
 template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; }
 
+
 #if (__cplusplus >= 201103L) || (_MSC_VER > 1900)  // C++11
 #include <memory>      // std::shared_ptr
 
+// Common base class for STL allocators in a specific heap
 template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
   using typename _mi_stl_allocator_common<T>::size_type;
+  using typename _mi_stl_allocator_common<T>::value_type;
+  using typename _mi_stl_allocator_common<T>::pointer;
 
-  _mi_heap_stl_allocator_common() { 
-    mi_heap_t* hp = mi_heap_new();
-    this->heap.reset(hp, heap_delete);
-  }
   _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { }    /* will not delete or destroy the passed in heap */
-  _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
-  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U>& x) mi_attr_noexcept : heap(x.heap) { }  
 
   #if (__cplusplus >= 201703L)  // C++17
   mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
@@ -490,58 +490,54 @@ template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocato
   #endif
 
   void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
+  template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U>& x) { return (this->heap == x.heap); }
 
 protected:
   std::shared_ptr<mi_heap_t> heap;
+  template<class U> friend struct _mi_heap_stl_allocator_common;
+  
+  _mi_heap_stl_allocator_common(bool destroy) {
+    mi_heap_t* hp = mi_heap_new();
+    this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete));  /* calls heap_delete/destroy when the refcount drops to zero */
+  }
+  _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
+  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U>& x) mi_attr_noexcept : heap(x.heap) { }
 
+private:
   static void heap_delete(mi_heap_t* hp)  { if (hp != NULL) { mi_heap_delete(hp); } }
+  static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } }
 };
 
 // STL allocator allocation in a specific heap
 template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T> {
   using typename _mi_heap_stl_allocator_common<T>::size_type;
+  mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T>(false) { }            /* delete on destruction */
+  mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T>(hp) { }  /* no delete or destroy on the passed in heap */
+  template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T>(x) { }
 
-  template <class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
-
-  using _mi_heap_stl_allocator_common<T>::_mi_heap_stl_allocator_common;
   mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
   void deallocate(T* p, size_type) { mi_free(p); }
-
-protected:
-  template <typename>
-  friend struct mi_heap_stl_allocator;
-  template<class T1,class T2>
-  friend bool operator==(const mi_heap_stl_allocator<T1>& first, const mi_heap_stl_allocator<T2>& second) mi_attr_noexcept;
-  template<class T1,class T2>
-  friend bool operator!=(const mi_heap_stl_allocator<T1>& first, const mi_heap_stl_allocator<T2>& second) mi_attr_noexcept;
+  template<class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
 };
 
-template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap == y.heap); }
-template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap != y.heap); }
+template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
+template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
 
 
 // STL allocator allocation in a specific heap, where `free` does nothing and 
 // the heap is destroyed in one go on destruction -- use with care!
 template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T> {
   using typename _mi_heap_stl_allocator_common<T>::size_type;
+  mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T>(true) { }   /* destroy on destruction */
+  template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T>(x) { }
 
-  template <class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
-
-  using _mi_heap_stl_allocator_common<T>::_mi_heap_stl_allocator_common;
   mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
   void deallocate(T* p, size_type) { /* do nothing as we destroy the heap on destruct. */ }
-
-protected:
-  template <typename>
-  friend struct mi_heap_destroy_stl_allocator;
-  template<class T1,class T2>
-  friend bool operator==(const mi_heap_destroy_stl_allocator<T1>& first, const mi_heap_destroy_stl_allocator<T2>& second) mi_attr_noexcept;
-  template<class T1,class T2>
-  friend bool operator!=(const mi_heap_destroy_stl_allocator<T1>& first, const mi_heap_destroy_stl_allocator<T2>& second) mi_attr_noexcept;
+  template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
 };
 
-template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap == y.heap); }
-template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.heap != y.heap); }
+template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
+template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
 
 #endif // C++11
 
diff --git a/test/main-override.cpp b/test/main-override.cpp
index 81f57298..63bf20d8 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -181,7 +181,7 @@ static void strdup_test() {
 // Issue #202
 static void heap_no_delete_worker() {
   mi_heap_t* heap = mi_heap_new();
-  void* q = mi_heap_malloc(heap, 1024);
+  void* q = mi_heap_malloc(heap, 1024); (void)(q);
   // mi_heap_delete(heap); // uncomment to prevent assertion
 }
 
@@ -245,7 +245,7 @@ static void heap_thread_free_huge_worker() {
 static void heap_thread_free_huge() {
   for (int i = 0; i < 10; i++) {
     shared_p = mi_malloc(1024 * 1024 * 1024);
-    auto t1 = std::thread(heap_thread_free_large_worker);
+    auto t1 = std::thread(heap_thread_free_huge_worker);
     t1.join();
   }
 }

From 7da4a34dc1d101310ccfab3acfb76388847f9fc3 Mon Sep 17 00:00:00 2001
From: Vadim Markovtsev <gmarkhor@gmail.com>
Date: Mon, 28 Nov 2022 11:55:58 +0100
Subject: [PATCH 59/88] Make "destroy" a compile-time constant + fix const
 allocator comparisons

---
 include/mimalloc.h | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index d70d28ed..81d387c3 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -471,7 +471,7 @@ template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const
 #include <memory>      // std::shared_ptr
 
 // Common base class for STL allocators in a specific heap
-template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
+template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
   using typename _mi_stl_allocator_common<T>::size_type;
   using typename _mi_stl_allocator_common<T>::value_type;
   using typename _mi_stl_allocator_common<T>::pointer;
@@ -490,18 +490,18 @@ template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocato
   #endif
 
   void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
-  template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U>& x) { return (this->heap == x.heap); }
+  template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, destroy>& x) const { return (this->heap == x.heap); }
 
 protected:
   std::shared_ptr<mi_heap_t> heap;
-  template<class U> friend struct _mi_heap_stl_allocator_common;
+  template<class U, bool D> friend struct _mi_heap_stl_allocator_common;
   
-  _mi_heap_stl_allocator_common(bool destroy) {
+  _mi_heap_stl_allocator_common() {
     mi_heap_t* hp = mi_heap_new();
     this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete));  /* calls heap_delete/destroy when the refcount drops to zero */
   }
   _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
-  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U>& x) mi_attr_noexcept : heap(x.heap) { }
+  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, destroy>& x) mi_attr_noexcept : heap(x.heap) { }
 
 private:
   static void heap_delete(mi_heap_t* hp)  { if (hp != NULL) { mi_heap_delete(hp); } }
@@ -509,11 +509,10 @@ private:
 };
 
 // STL allocator allocation in a specific heap
-template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T> {
-  using typename _mi_heap_stl_allocator_common<T>::size_type;
-  mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T>(false) { }            /* delete on destruction */
-  mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T>(hp) { }  /* no delete or destroy on the passed in heap */
-  template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T>(x) { }
+template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> {
+  using typename _mi_heap_stl_allocator_common<T, false>::size_type;
+  mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { }  /* no delete or destroy on the passed in heap */
+  template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { }
 
   mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
   void deallocate(T* p, size_type) { mi_free(p); }
@@ -526,10 +525,9 @@ template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x,
 
 // STL allocator allocation in a specific heap, where `free` does nothing and 
 // the heap is destroyed in one go on destruction -- use with care!
-template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T> {
-  using typename _mi_heap_stl_allocator_common<T>::size_type;
-  mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T>(true) { }   /* destroy on destruction */
-  template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T>(x) { }
+template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> {
+  using typename _mi_heap_stl_allocator_common<T, true>::size_type;
+  template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { }
 
   mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
   void deallocate(T* p, size_type) { /* do nothing as we destroy the heap on destruct. */ }

From be2bc9e7cad7002794f7bcc22e6d7af9f45149ed Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 28 Nov 2022 09:15:16 -0800
Subject: [PATCH 60/88] better attributes on new/delete overrides for Microsoft
 visual C++

---
 include/mimalloc-new-delete.h | 17 +++++++++++++----
 include/mimalloc.h            |  4 +++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/mimalloc-new-delete.h b/include/mimalloc-new-delete.h
index 1c12fad2..7af13032 100644
--- a/include/mimalloc-new-delete.h
+++ b/include/mimalloc-new-delete.h
@@ -22,17 +22,26 @@ terms of the MIT license. A copy of the license can be found in the file
   #include <new>
   #include <mimalloc.h>
 
+  #if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
+  // stay consistent with VCRT definitions
+  #define mi_decl_new(n)          mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n) 
+  #define mi_decl_new_nothrow(n)  mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
+  #else
+  #define mi_decl_new(n)          mi_decl_nodiscard mi_decl_restrict
+  #define mi_decl_new_nothrow(n)  mi_decl_nodiscard mi_decl_restrict
+  #endif
+
   void operator delete(void* p) noexcept              { mi_free(p); };
   void operator delete[](void* p) noexcept            { mi_free(p); };
 
   void operator delete  (void* p, const std::nothrow_t&) noexcept { mi_free(p); }
   void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); }
 
-  void* operator new(std::size_t n) noexcept(false)   { return mi_new(n); }
-  void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); }
+  mi_decl_new(n) void* operator new(std::size_t n) noexcept(false) { return mi_new(n); }
+  mi_decl_new(n) void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); }
 
-  void* operator new  (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
-  void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
+  mi_decl_new_nothrow(n) void* operator new  (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
+  mi_decl_new_nothrow(n) void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
 
   #if (__cplusplus >= 201402L || _MSC_VER >= 1916)
   void operator delete  (void* p, std::size_t n) noexcept { mi_free_size(p,n); };
diff --git a/include/mimalloc.h b/include/mimalloc.h
index d70d28ed..ccd38ab5 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -28,8 +28,10 @@ terms of the MIT license. A copy of the license can be found in the file
   #define mi_decl_nodiscard    [[nodiscard]]
 #elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)  // includes clang, icc, and clang-cl
   #define mi_decl_nodiscard    __attribute__((warn_unused_result))
+#elif defined(_HAS_NODISCARD)  
+  #define mi_decl_nodiscard    _NODISCARD
 #elif (_MSC_VER >= 1700)
-  #define mi_decl_nodiscard    _Check_return_
+  #define mi_decl_nodiscard    _Check_return_  
 #else
   #define mi_decl_nodiscard
 #endif

From e42a22c9ca58ac90d017de8405c188996ed94252 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 28 Nov 2022 09:59:48 -0800
Subject: [PATCH 61/88] fix warnings

---
 src/options.c     | 3 ++-
 test/test-wrong.c | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/options.c b/src/options.c
index 4fa5d5f8..4add09e0 100644
--- a/src/options.c
+++ b/src/options.c
@@ -106,7 +106,8 @@ void _mi_options_init(void) {
   for(int i = 0; i < _mi_option_last; i++ ) {
     mi_option_t option = (mi_option_t)i;
     long l = mi_option_get(option); MI_UNUSED(l); // initialize
-    if (option != mi_option_verbose) {
+    // if (option != mi_option_verbose) 
+    {
       mi_option_desc_t* desc = &options[option];
       _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
     }
diff --git a/test/test-wrong.c b/test/test-wrong.c
index 8bf7767e..6c3d5a74 100644
--- a/test/test-wrong.c
+++ b/test/test-wrong.c
@@ -31,9 +31,9 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 
 int main(int argc, char** argv) {
-  int* p = mi(malloc)(3*sizeof(int));
+  int* p = (int*)mi(malloc)(3*sizeof(int));
   
-  int* r = mi_malloc_aligned(8,16);
+  int* r = (int*)mi_malloc_aligned(8,16);
   mi_free(r);
 
   // illegal byte wise read
@@ -42,7 +42,7 @@ int main(int argc, char** argv) {
   mi(free)(c);
 
   // undefined access
-  int* q = mi(malloc)(sizeof(int));
+  int* q = (int*)mi(malloc)(sizeof(int));
   printf("undefined: %d\n", *q);
 
   // illegal int read

From 76db72814c0af5e8974b3fc5c4b25676dd953559 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 28 Nov 2022 10:54:45 -0800
Subject: [PATCH 62/88] fix unused parameter warning in clang

---
 include/mimalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index ccd38ab5..3828bc25 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -534,7 +534,7 @@ template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_all
   template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T>(x) { }
 
   mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
-  void deallocate(T* p, size_type) { /* do nothing as we destroy the heap on destruct. */ }
+  void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ }
   template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
 };
 

From 10981ab1220400d65eab6429537344ac1b7cf7f0 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 28 Nov 2022 10:55:19 -0800
Subject: [PATCH 63/88] add initial support for using mimalloc with address
 sanitizer support; use -DMI_ASAN=ON

---
 CMakeLists.txt           | 23 +++++++++++++++++++++++
 include/mimalloc-track.h | 23 +++++++++++++++++++++--
 src/alloc-aligned.c      |  2 +-
 src/alloc.c              |  2 +-
 src/init.c               |  1 +
 5 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4eebf2ca..4f118786 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,7 @@ option(MI_OVERRIDE          "Override the standard malloc interface (e.g. define
 option(MI_XMALLOC           "Enable abort() call on memory allocation failure by default" OFF)
 option(MI_SHOW_ERRORS       "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
 option(MI_VALGRIND          "Compile with Valgrind support (adds a small overhead)" OFF)
+option(MI_ASAN              "Compile with address sanitizer support (adds a small overhead)" OFF)
 option(MI_USE_CXX           "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
 option(MI_SEE_ASM           "Generate assembly files" OFF)
 option(MI_OSX_INTERPOSE     "Use interpose to override standard malloc on macOS" ON)
@@ -139,6 +140,25 @@ if(MI_VALGRIND)
   endif()
 endif()
 
+if(MI_ASAN)
+  if (MI_VALGRIND)
+    set(MI_ASAN OFF)
+    message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_ASAN=OFF)")
+  else()
+    CHECK_INCLUDE_FILES("sanitizer/asan_interface.h" MI_HAS_ASANH)
+    if (NOT MI_HAS_ASANH)
+      set(MI_ASAN OFF)
+      message(WARNING "Cannot find the 'sanitizer/asan_interface.h' -- install address sanitizer support first")
+      message(STATUS  "Compile **without** address sanitizer support (MI_ASAN=OFF)")
+    else()
+      message(STATUS "Compile with address sanitizer support (MI_ASAN=ON)")
+      list(APPEND mi_defines MI_ASAN=1)
+      list(APPEND mi_cflags -fsanitize=address)
+      list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=address)
+    endif()
+  endif()
+endif()
+
 if(MI_SEE_ASM)
   message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)")
   list(APPEND mi_cflags -save-temps)
@@ -296,6 +316,9 @@ if(MI_SECURE)
 endif()
 if(MI_VALGRIND)
   set(mi_basename "${mi_basename}-valgrind")
+endif()  
+if(MI_ASAN)
+  set(mi_basename "${mi_basename}-asan")
 endif()
 string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
 if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$"))
diff --git a/include/mimalloc-track.h b/include/mimalloc-track.h
index bb9df4fa..3bb9527c 100644
--- a/include/mimalloc-track.h
+++ b/include/mimalloc-track.h
@@ -10,12 +10,13 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // ------------------------------------------------------
 // Track memory ranges with macros for tools like Valgrind
-// or other memory checkers.
+// address sanitizer, or other memory checkers.
 // ------------------------------------------------------
 
 #if MI_VALGRIND
 
 #define MI_TRACK_ENABLED 1
+#define MI_TRACK_TOOL    "valgrind"
 
 #include <valgrind/valgrind.h>
 #include <valgrind/memcheck.h>
@@ -23,17 +24,35 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_track_malloc(p,size,zero)        VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
 #define mi_track_resize(p,oldsize,newsize)  VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)  
 #define mi_track_free(p)                    VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
+#define mi_track_free_size(p,_size)         mi_track_free(p)
 #define mi_track_mem_defined(p,size)        VALGRIND_MAKE_MEM_DEFINED(p,size)
 #define mi_track_mem_undefined(p,size)      VALGRIND_MAKE_MEM_UNDEFINED(p,size)
 #define mi_track_mem_noaccess(p,size)       VALGRIND_MAKE_MEM_NOACCESS(p,size)
 
+#elif MI_ASAN
+
+#define MI_TRACK_ENABLED 1
+#define MI_TRACK_TOOL    "asan"
+
+#include <sanitizer/asan_interface.h>
+
+#define mi_track_malloc(p,size,zero)        ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_resize(p,oldsize,newsize)  ASAN_POISON_MEMORY_REGION(p,oldsize); ASAN_UNPOISON_MEMORY_REGION(p,newsize)
+#define mi_track_free(p)                    ASAN_POISON_MEMORY_REGION(p,mi_usable_size(p))
+#define mi_track_free_size(p,size)          ASAN_POISON_MEMORY_REGION(p,size)
+#define mi_track_mem_defined(p,size)        ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_mem_undefined(p,size)      ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_mem_noaccess(p,size)       ASAN_POISON_MEMORY_REGION(p,size)
+
 #else
 
 #define MI_TRACK_ENABLED 0
+#define MI_TRACK_TOOL    "none"
 
 #define mi_track_malloc(p,size,zero)  
 #define mi_track_resize(p,oldsize,newsize)  
-#define mi_track_free(p)              
+#define mi_track_free(p,size)          
+#define mi_track_free_size(p,_size)
 #define mi_track_mem_defined(p,size)  
 #define mi_track_mem_undefined(p,size)  
 #define mi_track_mem_noaccess(p,size)  
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 73d5524c..04528d9d 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -80,7 +80,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   
   #if MI_TRACK_ENABLED
   if (p != aligned_p) {
-    mi_track_free(p);
+    mi_track_free_size(p, oversize);
     mi_track_malloc(aligned_p, size, zero);
   }
   else {
diff --git a/src/alloc.c b/src/alloc.c
index 7095dc67..01a0d4a4 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -697,7 +697,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
   const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0)
   if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) {  // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0)
     // todo: adjust potential padding to reflect the new size?
-    mi_track_free(p);
+    mi_track_free_size(p, size);
     mi_track_malloc(p,newsize,true);
     return p;  // reallocation still fits and not more than 50% waste
   }
diff --git a/src/init.c b/src/init.c
index 3f71fa01..66c7e74b 100644
--- a/src/init.c
+++ b/src/init.c
@@ -558,6 +558,7 @@ void mi_process_init(void) mi_attr_noexcept {
   _mi_verbose_message("debug level : %d\n", MI_DEBUG);
   #endif
   _mi_verbose_message("secure level: %d\n", MI_SECURE);
+  _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL);
   mi_thread_init();
 
   #if defined(_WIN32) && !defined(MI_SHARED_LIB)

From ddc984101904720615d8a07fef0b4984116d902b Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 28 Nov 2022 11:12:35 -0800
Subject: [PATCH 64/88] fix parameters for mi_track_free

---
 include/mimalloc-track.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-track.h b/include/mimalloc-track.h
index 3bb9527c..ed41375b 100644
--- a/include/mimalloc-track.h
+++ b/include/mimalloc-track.h
@@ -51,7 +51,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #define mi_track_malloc(p,size,zero)  
 #define mi_track_resize(p,oldsize,newsize)  
-#define mi_track_free(p,size)          
+#define mi_track_free(p)          
 #define mi_track_free_size(p,_size)
 #define mi_track_mem_defined(p,size)  
 #define mi_track_mem_undefined(p,size)  

From 745a34f4750fa5982255e1ae67b09661c2f27748 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Sat, 3 Dec 2022 00:23:43 +0100
Subject: [PATCH 65/88] Fix whitespace

This mostly deletes trailing spaces.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 CMakeLists.txt                   |  32 +++---
 azure-pipelines.yml              |   2 +-
 cmake/mimalloc-config.cmake      |   8 +-
 doc/mimalloc-doc.h               |  12 +--
 docs/annotated.html              |   8 +-
 docs/bench.html                  |   8 +-
 docs/build.html                  |   8 +-
 docs/classes.html                |   8 +-
 docs/doxygen.css                 |  59 ++++++-----
 docs/environment.html            |   8 +-
 docs/functions.html              |   8 +-
 docs/functions_vars.html         |   8 +-
 docs/group__aligned.html         |  10 +-
 docs/group__analysis.html        |  10 +-
 docs/group__cpp.html             |  10 +-
 docs/group__extended.html        |  10 +-
 docs/group__heap.html            |  10 +-
 docs/group__malloc.html          |  10 +-
 docs/group__options.html         |  10 +-
 docs/group__posix.html           |  10 +-
 docs/group__typed.html           |  10 +-
 docs/group__zeroinit.html        |  10 +-
 docs/index.html                  |   8 +-
 docs/mimalloc-doc_8h_source.html |   8 +-
 docs/modules.html                |   8 +-
 docs/navtree.css                 |   3 +-
 docs/overrides.html              |   8 +-
 docs/pages.html                  |   8 +-
 docs/search/search.css           |  13 ++-
 docs/search/searchdata.js        |   1 -
 docs/tabs.css                    |   1 -
 docs/using.html                  |   8 +-
 ide/vs2017/mimalloc.sln          | 142 +++++++++++++--------------
 ide/vs2019/mimalloc.sln          | 162 +++++++++++++++----------------
 ide/vs2022/mimalloc.sln          | 162 +++++++++++++++----------------
 include/mimalloc-atomic.h        |  10 +-
 include/mimalloc-internal.h      |  38 ++++----
 include/mimalloc-new-delete.h    |   4 +-
 include/mimalloc-track.h         |  14 +--
 include/mimalloc-types.h         |  22 ++---
 include/mimalloc.h               |  26 ++---
 readme.md                        |  49 +++++-----
 src/alloc-aligned.c              |  17 ++--
 src/alloc-override-osx.c         |  24 ++---
 src/alloc-override.c             |  38 ++++----
 src/alloc-posix.c                |   4 +-
 src/alloc.c                      |  60 ++++++------
 src/arena.c                      |  16 +--
 src/bitmap.c                     |  22 ++---
 src/heap.c                       |  20 ++--
 src/init.c                       |  32 +++---
 src/options.c                    |  20 ++--
 src/os.c                         |  74 +++++++-------
 src/page-queue.c                 |   4 +-
 src/page.c                       |  28 +++---
 src/random.c                     |  10 +-
 src/region.c                     |  52 +++++-----
 src/segment.c                    |  40 ++++----
 src/stats.c                      |  37 ++++---
 test/main-override-static.c      |  13 ++-
 test/main-override.cpp           |   4 +-
 test/test-api-fill.c             |   4 +-
 test/test-api.c                  |  22 ++---
 test/test-stress.c               |   4 +-
 test/test-wrong.c                |  10 +-
 test/testhelper.h                |  10 +-
 66 files changed, 760 insertions(+), 769 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4f118786..97a4984a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ option(MI_ASAN              "Compile with address sanitizer support (adds a smal
 option(MI_USE_CXX           "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
 option(MI_SEE_ASM           "Generate assembly files" OFF)
 option(MI_OSX_INTERPOSE     "Use interpose to override standard malloc on macOS" ON)
-option(MI_OSX_ZONE          "Use malloc zone to override standard malloc on macOS" ON) 
+option(MI_OSX_ZONE          "Use malloc zone to override standard malloc on macOS" ON)
 option(MI_WIN_REDIRECT      "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
 option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
 option(MI_BUILD_SHARED      "Build shared library" ON)
@@ -56,7 +56,7 @@ set(mi_sources
 # Convenience: set default build type depending on the build directory
 # -----------------------------------------------------------------------------
 
-message(STATUS "")    
+message(STATUS "")
 if (NOT CMAKE_BUILD_TYPE)
   if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR  MI_DEBUG_FULL)
     message(STATUS "No build type selected, default to: Debug")
@@ -88,7 +88,7 @@ if(MI_OVERRIDE)
       # use zone's on macOS
       message(STATUS "  Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
       list(APPEND mi_sources src/alloc-override-osx.c)
-      list(APPEND mi_defines MI_OSX_ZONE=1)      
+      list(APPEND mi_defines MI_OSX_ZONE=1)
       if (NOT MI_OSX_INTERPOSE)
         message(STATUS "  WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)")
       endif()
@@ -201,12 +201,12 @@ if(MI_DEBUG_TSAN)
     list(APPEND mi_cflags -fsanitize=thread -g -O1)
     list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=thread)
   else()
-    message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)")    
-  endif()  
+    message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)")
+  endif()
 endif()
 
 if(MI_DEBUG_UBSAN)
-  if(CMAKE_BUILD_TYPE MATCHES "Debug")    
+  if(CMAKE_BUILD_TYPE MATCHES "Debug")
     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
       message(STATUS "Build with undefined-behavior sanitizer (MI_DEBUG_UBSAN=ON)")
       list(APPEND mi_cflags -fsanitize=undefined -g -fno-sanitize-recover=undefined)
@@ -216,10 +216,10 @@ if(MI_DEBUG_UBSAN)
         set(MI_USE_CXX "ON")
       endif()
     else()
-      message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)")    
-    endif()  
+      message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)")
+    endif()
   else()
-    message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")    
+    message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
   endif()
 endif()
 
@@ -240,7 +240,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
   list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
   if(NOT MI_USE_CXX)
     list(APPEND mi_cflags -Wstrict-prototypes)
-  endif()  
+  endif()
   if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
     list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline)
   endif()
@@ -272,7 +272,7 @@ if(WIN32)
 else()
   set(pc_libraries "")
   find_library(MI_LIBPTHREAD pthread)
-  if (MI_LIBPTHREAD)                      
+  if (MI_LIBPTHREAD)
     list(APPEND mi_libraries ${MI_LIBPTHREAD})
     set(pc_libraries "${pc_libraries} -pthread")
   endif()
@@ -282,10 +282,10 @@ else()
     set(pc_libraries "${pc_libraries} -lrt")
   endif()
   find_library(MI_LIBATOMIC atomic)
-  if (NOT MI_LIBATOMIC AND MI_USE_LIBATOMIC) 
+  if (NOT MI_LIBATOMIC AND MI_USE_LIBATOMIC)
     set(MI_LIBATOMIC atomic)
   endif()
-  if (MI_LIBATOMIC) 
+  if (MI_LIBATOMIC)
     list(APPEND mi_libraries ${MI_LIBATOMIC})
     set(pc_libraries "${pc_libraries} -latomic")
   endif()
@@ -302,7 +302,7 @@ set(mi_install_libdir   "${CMAKE_INSTALL_LIBDIR}")
 # are either installed at top level, or use versioned directories for side-by-side installation (default)
 if (MI_INSTALL_TOPLEVEL)
   set(mi_install_objdir     "${CMAKE_INSTALL_LIBDIR}")
-  set(mi_install_incdir     "${CMAKE_INSTALL_INCLUDEDIR}")        
+  set(mi_install_incdir     "${CMAKE_INSTALL_INCLUDEDIR}")
   set(mi_install_cmakedir   "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc")
 else()
   set(mi_install_objdir     "${CMAKE_INSTALL_LIBDIR}/mimalloc-${mi_version}")       # for static library and object files
@@ -316,7 +316,7 @@ if(MI_SECURE)
 endif()
 if(MI_VALGRIND)
   set(mi_basename "${mi_basename}-valgrind")
-endif()  
+endif()
 if(MI_ASAN)
   set(mi_basename "${mi_basename}-asan")
 endif()
@@ -383,7 +383,7 @@ if(MI_BUILD_SHARED)
     install(FILES "$<TARGET_FILE_DIR:mimalloc>/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" DESTINATION ${mi_install_libdir})
   endif()
 
-  install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY)  
+  install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY)
   install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
 endif()
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 57cabbef..5900b225 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -13,7 +13,7 @@ trigger:
     include:
     - v*
 
-jobs:  
+jobs:
 - job:
   displayName: Windows
   pool:
diff --git a/cmake/mimalloc-config.cmake b/cmake/mimalloc-config.cmake
index 8a28e37e..a49b02a2 100644
--- a/cmake/mimalloc-config.cmake
+++ b/cmake/mimalloc-config.cmake
@@ -2,13 +2,13 @@ include(${CMAKE_CURRENT_LIST_DIR}/mimalloc.cmake)
 get_filename_component(MIMALLOC_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH)  # one up from the cmake dir, e.g. /usr/local/lib/cmake/mimalloc-2.0
 get_filename_component(MIMALLOC_VERSION_DIR "${CMAKE_CURRENT_LIST_DIR}" NAME)
 string(REPLACE "/lib/cmake" "/lib" MIMALLOC_LIBRARY_DIR "${MIMALLOC_CMAKE_DIR}")
-if("${MIMALLOC_VERSION_DIR}" EQUAL "mimalloc")  
+if("${MIMALLOC_VERSION_DIR}" EQUAL "mimalloc")
   # top level install
   string(REPLACE "/lib/cmake" "/include" MIMALLOC_INCLUDE_DIR "${MIMALLOC_CMAKE_DIR}")
   set(MIMALLOC_OBJECT_DIR "${MIMALLOC_LIBRARY_DIR}")
-else()  
+else()
   # versioned
   string(REPLACE "/lib/cmake/" "/include/" MIMALLOC_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}")
-  string(REPLACE "/lib/cmake/" "/lib/" MIMALLOC_OBJECT_DIR "${CMAKE_CURRENT_LIST_DIR}")  
-endif()  
+  string(REPLACE "/lib/cmake/" "/lib/" MIMALLOC_OBJECT_DIR "${CMAKE_CURRENT_LIST_DIR}")
+endif()
 set(MIMALLOC_TARGET_DIR "${MIMALLOC_LIBRARY_DIR}") # legacy
diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h
index e0f1cae1..9525c9ea 100644
--- a/doc/mimalloc-doc.h
+++ b/doc/mimalloc-doc.h
@@ -40,7 +40,7 @@ Notable aspects of the design include:
   per mimalloc page, but for each page we have multiple free lists. In particular, there
   is one list for thread-local `free` operations, and another one for concurrent `free`
   operations. Free-ing from another thread can now be a single CAS without needing
-  sophisticated coordination between threads. Since there will be 
+  sophisticated coordination between threads. Since there will be
   thousands of separate free lists, contention is naturally distributed over the heap,
   and the chance of contending on a single location will be low -- this is quite
   similar to randomized algorithms like skip lists where adding
@@ -414,7 +414,7 @@ void mi_register_error(mi_error_fun* errfun, void* arg);
 bool mi_is_in_heap_region(const void* p);
 
 /// Reserve OS memory for use by mimalloc. Reserved areas are used
-/// before allocating from the OS again. By reserving a large area upfront, 
+/// before allocating from the OS again. By reserving a large area upfront,
 /// allocation can be more efficient, and can be better managed on systems
 /// without `mmap`/`VirtualAlloc` (like WASM for example).
 /// @param size        The size to reserve.
@@ -423,7 +423,7 @@ bool mi_is_in_heap_region(const void* p);
 /// @return \a 0 if successful, and an error code otherwise (e.g. `ENOMEM`).
 int  mi_reserve_os_memory(size_t size, bool commit, bool allow_large);
 
-/// Manage a particular memory area for use by mimalloc. 
+/// Manage a particular memory area for use by mimalloc.
 /// This is just like `mi_reserve_os_memory` except that the area should already be
 /// allocated in some manner and available for use my mimalloc.
 /// @param start       Start of the memory area
@@ -499,7 +499,7 @@ void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_m
 /// \{
 
 /// The maximum supported alignment size (currently 1MiB).
-#define MI_ALIGNMENT_MAX   (1024*1024UL)   
+#define MI_ALIGNMENT_MAX   (1024*1024UL)
 
 /// Allocate \a size bytes aligned by \a alignment.
 /// @param size  number of bytes to allocate.
@@ -813,7 +813,7 @@ typedef enum mi_option_e {
   mi_option_page_reset,      ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free.
   mi_option_abandoned_page_reset, //< Reset free page memory when a thread terminates.
   mi_option_use_numa_nodes,  ///< Pretend there are at most N NUMA nodes; Use 0 to use the actual detected NUMA nodes at runtime.
-  mi_option_eager_commit_delay,  ///< the first N segments per thread are not eagerly committed (=1). 
+  mi_option_eager_commit_delay,  ///< the first N segments per thread are not eagerly committed (=1).
   mi_option_os_tag,          ///< OS tag to assign to mimalloc'd memory
   mi_option_limit_os_alloc,  ///< If set to 1, do not use OS memory for allocation (but only pre-reserved arenas)
 
@@ -1097,7 +1097,7 @@ or via environment variables.
    `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
    of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
    and allocate just a little to take up space in the huge OS page area (which cannot be reset).
-- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node. 
+- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node.
    (`N` is -1 by default to reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected))
 
 Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
diff --git a/docs/annotated.html b/docs/annotated.html
index f3e392a4..948a8863 100644
--- a/docs/annotated.html
+++ b/docs/annotated.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('annotated.html',''); initResizable();
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/bench.html b/docs/bench.html
index 6c472895..d54f5fd6 100644
--- a/docs/bench.html
+++ b/docs/bench.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('bench.html',''); initResizable(); });
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/build.html b/docs/build.html
index dbcc0d75..eac5392b 100644
--- a/docs/build.html
+++ b/docs/build.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('build.html',''); initResizable(); });
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/classes.html b/docs/classes.html
index b744c4d9..3baa0db0 100644
--- a/docs/classes.html
+++ b/docs/classes.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('classes.html',''); initResizable(); })
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/doxygen.css b/docs/doxygen.css
index f090ef79..38091805 100644
--- a/docs/doxygen.css
+++ b/docs/doxygen.css
@@ -221,11 +221,11 @@ a.elRef {
 }
 
 a.code, a.code:visited, a.line, a.line:visited {
-	color: #171919; 
+	color: #171919;
 }
 
 a.codeRef, a.codeRef:visited, a.lineRef, a.lineRef:visited {
-	color: #171919; 
+	color: #171919;
 }
 
 /* @end */
@@ -411,7 +411,7 @@ p.formulaDsp {
 }
 
 img.formulaDsp {
-	
+
 }
 
 img.formulaInl, img.inline {
@@ -469,20 +469,20 @@ span.charliteral {
 	color: #008080
 }
 
-span.vhdldigit { 
-	color: #ff00ff 
+span.vhdldigit {
+	color: #ff00ff
 }
 
-span.vhdlchar { 
-	color: #000000 
+span.vhdlchar {
+	color: #000000
 }
 
-span.vhdlkeyword { 
-	color: #700070 
+span.vhdlkeyword {
+	color: #700070
 }
 
-span.vhdllogic { 
-	color: #ff0000 
+span.vhdllogic {
+	color: #ff0000
 }
 
 blockquote {
@@ -707,9 +707,9 @@ table.memberdecls {
 }
 
 .memdoc, dl.reflist dd {
-        border-bottom: 1px solid #697273;      
-        border-left: 1px solid #697273;      
-        border-right: 1px solid #697273; 
+        border-bottom: 1px solid #697273;
+        border-left: 1px solid #697273;
+        border-right: 1px solid #697273;
         padding: 6px 10px 2px 10px;
         background-color: #F7F8F8;
         border-top-width: 0;
@@ -761,18 +761,18 @@ dl.reflist dd {
 .params, .retval, .exception, .tparams {
         margin-left: 0px;
         padding-left: 0px;
-}       
+}
 
 .params .paramname, .retval .paramname, .tparams .paramname, .exception .paramname {
         font-weight: bold;
         vertical-align: top;
 }
-        
+
 .params .paramtype, .tparams .paramtype {
         font-style: italic;
         vertical-align: top;
-}       
-        
+}
+
 .params .paramdir, .tparams .paramdir {
         font-family: "courier new",courier,monospace;
         vertical-align: top;
@@ -1028,8 +1028,8 @@ table.fieldtable {
 
 .fieldtable td.fielddoc p:first-child {
         margin-top: 0px;
-}       
-        
+}
+
 .fieldtable td.fielddoc p:last-child {
         margin-bottom: 2px;
 }
@@ -1104,7 +1104,7 @@ table.fieldtable {
 	color: #040404;
 	font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif;
 	text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9);
-	text-decoration: none;        
+	text-decoration: none;
 }
 
 .navpath li.navelem a:hover
@@ -1133,7 +1133,7 @@ div.summary
 	padding-right: 5px;
 	width: 50%;
 	text-align: right;
-}       
+}
 
 div.summary a
 {
@@ -1148,7 +1148,7 @@ table.classindex
         margin-right: 3%;
         width: 94%;
         border: 0;
-        border-spacing: 0; 
+        border-spacing: 0;
         padding: 0;
 }
 
@@ -1328,12 +1328,12 @@ dl.section dd {
 	vertical-align: bottom;
 	border-collapse: separate;
 }
- 
+
 #projectlogo img
-{ 
+{
 	border: 0px none;
 }
- 
+
 #projectalign
 {
         vertical-align: middle;
@@ -1345,7 +1345,7 @@ dl.section dd {
 	margin: 0px;
 	padding: 2px 0px;
 }
-    
+
 #projectbrief
 {
 	font: 120% Tahoma, Arial,sans-serif;
@@ -1463,7 +1463,7 @@ div.toc ul {
         list-style: none outside none;
         border: medium none;
         padding: 0px;
-}       
+}
 
 div.toc li.level1 {
         margin-left: 0px;
@@ -1783,11 +1783,10 @@ table.DocNodeLTR {
 tt, code, kbd, samp
 {
   display: inline-block;
-  direction:ltr; 
+  direction:ltr;
 }
 /* @end */
 
 u {
 	text-decoration: underline;
 }
-
diff --git a/docs/environment.html b/docs/environment.html
index f571f95f..58d20e1b 100644
--- a/docs/environment.html
+++ b/docs/environment.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('environment.html',''); initResizable()
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/functions.html b/docs/functions.html
index 373fafe2..0419c965 100644
--- a/docs/functions.html
+++ b/docs/functions.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('functions.html',''); initResizable();
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/functions_vars.html b/docs/functions_vars.html
index a12ef622..d5252ae7 100644
--- a/docs/functions_vars.html
+++ b/docs/functions_vars.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('functions_vars.html',''); initResizabl
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/group__aligned.html b/docs/group__aligned.html
index bd11f30f..c9ad48ed 100644
--- a/docs/group__aligned.html
+++ b/docs/group__aligned.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__aligned.html',''); initResizabl
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -102,7 +102,7 @@ $(document).ready(function(){initNavTree('group__aligned.html',''); initResizabl
 </div><!--header-->
 <div class="contents">
 
-<p>Allocating aligned memory blocks.  
+<p>Allocating aligned memory blocks.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="define-members"></a>
diff --git a/docs/group__analysis.html b/docs/group__analysis.html
index 883099fe..6ceb4d54 100644
--- a/docs/group__analysis.html
+++ b/docs/group__analysis.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__analysis.html',''); initResizab
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -103,7 +103,7 @@ $(document).ready(function(){initNavTree('group__analysis.html',''); initResizab
 </div><!--header-->
 <div class="contents">
 
-<p>Inspect the heap at runtime.  
+<p>Inspect the heap at runtime.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
diff --git a/docs/group__cpp.html b/docs/group__cpp.html
index e81179fe..2ad5303a 100644
--- a/docs/group__cpp.html
+++ b/docs/group__cpp.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__cpp.html',''); initResizable();
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -102,7 +102,7 @@ $(document).ready(function(){initNavTree('group__cpp.html',''); initResizable();
 </div><!--header-->
 <div class="contents">
 
-<p><code>mi_</code> prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling <code>std::get_new_handler</code> and raising a <code>std::bad_alloc</code> exception on failure.  
+<p><code>mi_</code> prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling <code>std::get_new_handler</code> and raising a <code>std::bad_alloc</code> exception on failure.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
diff --git a/docs/group__extended.html b/docs/group__extended.html
index e5499118..bf71e925 100644
--- a/docs/group__extended.html
+++ b/docs/group__extended.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__extended.html',''); initResizab
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -103,7 +103,7 @@ $(document).ready(function(){initNavTree('group__extended.html',''); initResizab
 </div><!--header-->
 <div class="contents">
 
-<p>Extended functionality.  
+<p>Extended functionality.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="define-members"></a>
diff --git a/docs/group__heap.html b/docs/group__heap.html
index 0f21ea42..5f976989 100644
--- a/docs/group__heap.html
+++ b/docs/group__heap.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__heap.html',''); initResizable()
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -102,7 +102,7 @@ $(document).ready(function(){initNavTree('group__heap.html',''); initResizable()
 </div><!--header-->
 <div class="contents">
 
-<p>First-class heaps that can be destroyed in one go.  
+<p>First-class heaps that can be destroyed in one go.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="typedef-members"></a>
diff --git a/docs/group__malloc.html b/docs/group__malloc.html
index 2dc16656..c110fdb9 100644
--- a/docs/group__malloc.html
+++ b/docs/group__malloc.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__malloc.html',''); initResizable
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -101,7 +101,7 @@ $(document).ready(function(){initNavTree('group__malloc.html',''); initResizable
 </div><!--header-->
 <div class="contents">
 
-<p>The basic allocation interface.  
+<p>The basic allocation interface.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
diff --git a/docs/group__options.html b/docs/group__options.html
index f92905d4..6c63e172 100644
--- a/docs/group__options.html
+++ b/docs/group__options.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__options.html',''); initResizabl
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -102,7 +102,7 @@ $(document).ready(function(){initNavTree('group__options.html',''); initResizabl
 </div><!--header-->
 <div class="contents">
 
-<p>Set runtime behavior.  
+<p>Set runtime behavior.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="enum-members"></a>
diff --git a/docs/group__posix.html b/docs/group__posix.html
index 539f7ec6..37c87028 100644
--- a/docs/group__posix.html
+++ b/docs/group__posix.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__posix.html',''); initResizable(
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -101,7 +101,7 @@ $(document).ready(function(){initNavTree('group__posix.html',''); initResizable(
 </div><!--header-->
 <div class="contents">
 
-<p><code>mi_</code> prefixed implementations of various Posix, Unix, and C++ allocation functions.  
+<p><code>mi_</code> prefixed implementations of various Posix, Unix, and C++ allocation functions.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
diff --git a/docs/group__typed.html b/docs/group__typed.html
index c19c7f4a..3c00adb4 100644
--- a/docs/group__typed.html
+++ b/docs/group__typed.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__typed.html',''); initResizable(
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -101,7 +101,7 @@ $(document).ready(function(){initNavTree('group__typed.html',''); initResizable(
 </div><!--header-->
 <div class="contents">
 
-<p>Typed allocation macros.  
+<p>Typed allocation macros.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="define-members"></a>
diff --git a/docs/group__zeroinit.html b/docs/group__zeroinit.html
index 329a7739..41af9a27 100644
--- a/docs/group__zeroinit.html
+++ b/docs/group__zeroinit.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('group__zeroinit.html',''); initResizab
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
@@ -101,7 +101,7 @@ $(document).ready(function(){initNavTree('group__zeroinit.html',''); initResizab
 </div><!--header-->
 <div class="contents">
 
-<p>The zero-initialized re-allocations are only valid on memory that was originally allocated with zero initialization too.  
+<p>The zero-initialized re-allocations are only valid on memory that was originally allocated with zero initialization too.
 <a href="#details">More...</a></p>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
diff --git a/docs/index.html b/docs/index.html
index 2ea91215..6cc439df 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('index.html',''); initResizable(); });
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html
index 8935de8a..d706251d 100644
--- a/docs/mimalloc-doc_8h_source.html
+++ b/docs/mimalloc-doc_8h_source.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html',''); init
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/modules.html b/docs/modules.html
index 7457cb9f..0129057c 100644
--- a/docs/modules.html
+++ b/docs/modules.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('modules.html',''); initResizable(); })
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/navtree.css b/docs/navtree.css
index 046366ca..a270571a 100644
--- a/docs/navtree.css
+++ b/docs/navtree.css
@@ -67,7 +67,7 @@
 
 #nav-tree {
   padding: 0px 0px;
-  background-color: #FAFAFF; 
+  background-color: #FAFAFF;
   font-size:14px;
   overflow:auto;
 }
@@ -143,4 +143,3 @@
   #nav-tree { display: none; }
   div.ui-resizable-handle { display: none; position: relative; }
 }
-
diff --git a/docs/overrides.html b/docs/overrides.html
index fc0ad591..9c249a37 100644
--- a/docs/overrides.html
+++ b/docs/overrides.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('overrides.html',''); initResizable();
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/pages.html b/docs/pages.html
index 6999a810..60b7fc30 100644
--- a/docs/pages.html
+++ b/docs/pages.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('pages.html',''); initResizable(); });
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/docs/search/search.css b/docs/search/search.css
index 10bd4b55..d30e0274 100644
--- a/docs/search/search.css
+++ b/docs/search/search.css
@@ -131,7 +131,7 @@ span.SelectionMark {
 a.SelectItem {
     display: block;
     outline-style: none;
-    color: #000000; 
+    color: #000000;
     text-decoration: none;
     padding-left:   6px;
     padding-right: 12px;
@@ -139,7 +139,7 @@ a.SelectItem {
 
 a.SelectItem:focus,
 a.SelectItem:active {
-    color: #000000; 
+    color: #000000;
     outline-style: none;
     text-decoration: none;
 }
@@ -173,7 +173,7 @@ iframe#MSearchResults {
 
 
 #SRIndex {
-    clear:both; 
+    clear:both;
     padding-bottom: 15px;
 }
 
@@ -192,7 +192,7 @@ body.SRPage {
 }
 
 .SRChildren {
-    padding-left: 3ex; padding-bottom: .5em 
+    padding-left: 3ex; padding-bottom: .5em
 }
 
 .SRPage .SRChildren {
@@ -200,7 +200,7 @@ body.SRPage {
 }
 
 .SRSymbol {
-    font-weight: bold; 
+    font-weight: bold;
     color: #121414;
     font-family: Arial, Verdana, sans-serif;
     text-decoration: none;
@@ -209,7 +209,7 @@ body.SRPage {
 
 a.SRScope {
     display: block;
-    color: #121414; 
+    color: #121414;
     font-family: Arial, Verdana, sans-serif;
     text-decoration: none;
     outline: none;
@@ -270,4 +270,3 @@ DIV.searchresults {
 .searchpages {
     margin-top: 10px;
 }
-
diff --git a/docs/search/searchdata.js b/docs/search/searchdata.js
index dd31068e..c4c02fa5 100644
--- a/docs/search/searchdata.js
+++ b/docs/search/searchdata.js
@@ -36,4 +36,3 @@ var indexSectionLabels =
   7: "Modules",
   8: "Pages"
 };
-
diff --git a/docs/tabs.css b/docs/tabs.css
index e1f2e0b7..a87eadeb 100644
--- a/docs/tabs.css
+++ b/docs/tabs.css
@@ -58,4 +58,3 @@
     color: #fff;
     text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0);
 }
-
diff --git a/docs/using.html b/docs/using.html
index 140f0c5c..eea12cfc 100644
--- a/docs/using.html
+++ b/docs/using.html
@@ -43,8 +43,8 @@
                onmouseout="return searchBox.OnSearchSelectHide()"
                alt=""/>
           <input type="text" id="MSearchField" value="Search" accesskey="S"
-               onfocus="searchBox.OnSearchFieldFocus(true)" 
-               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onfocus="searchBox.OnSearchFieldFocus(true)"
+               onblur="searchBox.OnSearchFieldFocus(false)"
                onkeyup="searchBox.OnSearchFieldChange(event)"/>
           </span><span class="right">
             <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.svg" alt=""/></a>
@@ -69,7 +69,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search','.html');
       <div id="nav-sync" class="sync"></div>
     </div>
   </div>
-  <div id="splitbar" style="-moz-user-select:none;" 
+  <div id="splitbar" style="-moz-user-select:none;"
        class="ui-resizable-handle">
   </div>
 </div>
@@ -88,7 +88,7 @@ $(document).ready(function(){initNavTree('using.html',''); initResizable(); });
 
 <!-- iframe showing the search results (closed by default) -->
 <div id="MSearchResultsWindow">
-<iframe src="javascript:void(0)" frameborder="0" 
+<iframe src="javascript:void(0)" frameborder="0"
         name="MSearchResults" id="MSearchResults">
 </iframe>
 </div>
diff --git a/ide/vs2017/mimalloc.sln b/ide/vs2017/mimalloc.sln
index aeab6b88..7dbf53e1 100644
--- a/ide/vs2017/mimalloc.sln
+++ b/ide/vs2017/mimalloc.sln
@@ -1,71 +1,71 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 15
-VisualStudioVersion = 15.0.28010.2016
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
-	EndGlobalSection
-EndGlobal
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.28010.2016
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
+	EndGlobalSection
+EndGlobal
diff --git a/ide/vs2019/mimalloc.sln b/ide/vs2019/mimalloc.sln
index fcb938a4..6ff01d3b 100644
--- a/ide/vs2019/mimalloc.sln
+++ b/ide/vs2019/mimalloc.sln
@@ -1,81 +1,81 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.29709.97
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-api", "mimalloc-test-api.vcxproj", "{FFF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
-	EndGlobalSection
-EndGlobal
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.29709.97
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-api", "mimalloc-test-api.vcxproj", "{FFF7958F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
+	EndGlobalSection
+EndGlobal
diff --git a/ide/vs2022/mimalloc.sln b/ide/vs2022/mimalloc.sln
index fcb938a4..6ff01d3b 100644
--- a/ide/vs2022/mimalloc.sln
+++ b/ide/vs2022/mimalloc.sln
@@ -1,81 +1,81 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.29709.97
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-api", "mimalloc-test-api.vcxproj", "{FFF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
-	EndGlobalSection
-EndGlobal
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.29709.97
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-api", "mimalloc-test-api.vcxproj", "{FFF7958F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
+	EndGlobalSection
+EndGlobal
diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index 7ad5da58..c66f8049 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file
 // --------------------------------------------------------------------------------------------
 // Atomics
 // We need to be portable between C, C++, and MSVC.
-// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. 
-// This is why we try to use only `uintptr_t` and `<type>*` as atomic types. 
-// To gain better insight in the range of used atomics, we use explicitly named memory order operations 
+// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
+// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
+// To gain better insight in the range of used atomics, we use explicitly named memory order operations
 // instead of passing the memory order as a parameter.
 // -----------------------------------------------------------------------------------------------
 
@@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 #elif defined(_MSC_VER)
 // Use MSVC C wrapper for C11 atomics
-#define  _Atomic(tp)            tp 
+#define  _Atomic(tp)            tp
 #define  MI_ATOMIC_VAR_INIT(x)  x
 #define  mi_atomic(name)        mi_atomic_##name
 #define  mi_memory_order(name)  mi_memory_order_##name
@@ -275,7 +275,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
   return (intptr_t)mi_atomic_addi(p, -sub);
 }
 
-// Yield 
+// Yield
 #if defined(__cplusplus)
 #include <thread>
 static inline void mi_atomic_yield(void) {
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index cb4c79af..fc18a8f2 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -41,10 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
 #if defined(__cplusplus)
 #define mi_decl_externc       extern "C"
 #else
-#define mi_decl_externc  
+#define mi_decl_externc
 #endif
 
-#if !defined(_WIN32) && !defined(__wasi__) 
+#if !defined(_WIN32) && !defined(__wasi__)
 #define  MI_USE_PTHREADS
 #include <pthread.h>
 #endif
@@ -336,14 +336,14 @@ mi_heap_t*  _mi_heap_main_get(void);    // statically allocated main backing hea
 
 #if defined(MI_MALLOC_OVERRIDE)
 #if defined(__APPLE__) // macOS
-#define MI_TLS_SLOT               89  // seems unused? 
-// #define MI_TLS_RECURSE_GUARD 1     
+#define MI_TLS_SLOT               89  // seems unused?
+// #define MI_TLS_RECURSE_GUARD 1
 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
 // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
 #elif defined(__OpenBSD__)
-// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) 
+// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
 // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
-#define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)  
+#define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)
 // #elif defined(__DragonFly__)
 // #warning "mimalloc is not working correctly on DragonFly yet."
 // #define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
@@ -383,7 +383,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
     #ifdef __GNUC__
     __asm(""); // prevent conditional load of the address of _mi_heap_empty
     #endif
-    heap = (mi_heap_t*)&_mi_heap_empty;    
+    heap = (mi_heap_t*)&_mi_heap_empty;
   }
   return heap;
 #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
@@ -393,7 +393,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
   mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
   return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
 #else
-  #if defined(MI_TLS_RECURSE_GUARD)  
+  #if defined(MI_TLS_RECURSE_GUARD)
   if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
   #endif
   return _mi_heap_default;
@@ -437,7 +437,7 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {
 
 // Segment that contains the pointer
 // Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
-// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; 
+// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
 // therefore we align one byte before `p`.
 static inline mi_segment_t* _mi_ptr_segment(const void* p) {
   mi_assert_internal(p != NULL);
@@ -660,7 +660,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
   next = (mi_block_t*)block->next;
   #endif
   mi_track_mem_noaccess(block,sizeof(mi_block_t));
-  return next;  
+  return next;
 }
 
 static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
@@ -756,7 +756,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
   return (uintptr_t)NtCurrentTeb();
 }
 
-// We use assembly for a fast thread id on the main platforms. The TLS layout depends on 
+// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
 // both the OS and libc implementation so we use specific tests for each main platform.
 // If you test on another platform and it works please send a PR :-)
 // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
@@ -869,7 +869,7 @@ static inline size_t mi_ctz(uintptr_t x) {
 #endif
 }
 
-#elif defined(_MSC_VER) 
+#elif defined(_MSC_VER)
 
 #include <limits.h>       // LONG_MAX
 #define MI_HAVE_FAST_BITSCAN
@@ -880,7 +880,7 @@ static inline size_t mi_clz(uintptr_t x) {
   _BitScanReverse(&idx, x);
 #else
   _BitScanReverse64(&idx, x);
-#endif  
+#endif
   return ((MI_INTPTR_BITS - 1) - idx);
 }
 static inline size_t mi_ctz(uintptr_t x) {
@@ -890,7 +890,7 @@ static inline size_t mi_ctz(uintptr_t x) {
   _BitScanForward(&idx, x);
 #else
   _BitScanForward64(&idx, x);
-#endif  
+#endif
   return idx;
 }
 
@@ -920,7 +920,7 @@ static inline size_t mi_clz32(uint32_t x) {
 }
 
 static inline size_t mi_clz(uintptr_t x) {
-  if (x==0) return MI_INTPTR_BITS;  
+  if (x==0) return MI_INTPTR_BITS;
 #if (MI_INTPTR_BITS <= 32)
   return mi_clz32((uint32_t)x);
 #else
@@ -951,9 +951,9 @@ static inline size_t mi_bsr(uintptr_t x) {
 // ---------------------------------------------------------------------------------
 // Provide our own `_mi_memcpy` for potential performance optimizations.
 //
-// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if 
-// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support 
-// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. 
+// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
+// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
+// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
 // ---------------------------------------------------------------------------------
 
 #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@@ -988,7 +988,7 @@ static inline void _mi_memzero(void* dst, size_t n) {
 
 
 // -------------------------------------------------------------------------------
-// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned 
+// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
 // This is used for example in `mi_realloc`.
 // -------------------------------------------------------------------------------
 
diff --git a/include/mimalloc-new-delete.h b/include/mimalloc-new-delete.h
index 7af13032..c16f4a66 100644
--- a/include/mimalloc-new-delete.h
+++ b/include/mimalloc-new-delete.h
@@ -24,7 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
   #if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
   // stay consistent with VCRT definitions
-  #define mi_decl_new(n)          mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n) 
+  #define mi_decl_new(n)          mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
   #define mi_decl_new_nothrow(n)  mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
   #else
   #define mi_decl_new(n)          mi_decl_nodiscard mi_decl_restrict
@@ -55,7 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file
   void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
   void operator delete  (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
   void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
-  
+
   void* operator new  (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
   void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
   void* operator new  (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
diff --git a/include/mimalloc-track.h b/include/mimalloc-track.h
index ed41375b..f60d7acd 100644
--- a/include/mimalloc-track.h
+++ b/include/mimalloc-track.h
@@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <valgrind/memcheck.h>
 
 #define mi_track_malloc(p,size,zero)        VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
-#define mi_track_resize(p,oldsize,newsize)  VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)  
+#define mi_track_resize(p,oldsize,newsize)  VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
 #define mi_track_free(p)                    VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
 #define mi_track_free_size(p,_size)         mi_track_free(p)
 #define mi_track_mem_defined(p,size)        VALGRIND_MAKE_MEM_DEFINED(p,size)
@@ -49,13 +49,13 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_TRACK_ENABLED 0
 #define MI_TRACK_TOOL    "none"
 
-#define mi_track_malloc(p,size,zero)  
-#define mi_track_resize(p,oldsize,newsize)  
-#define mi_track_free(p)          
+#define mi_track_malloc(p,size,zero)
+#define mi_track_resize(p,oldsize,newsize)
+#define mi_track_free(p)
 #define mi_track_free_size(p,_size)
-#define mi_track_mem_defined(p,size)  
-#define mi_track_mem_undefined(p,size)  
-#define mi_track_mem_noaccess(p,size)  
+#define mi_track_mem_defined(p,size)
+#define mi_track_mem_undefined(p,size)
+#define mi_track_mem_noaccess(p,size)
 
 #endif
 
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index d1cceb9b..3ffa7fa2 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #ifdef _MSC_VER
 #pragma warning(disable:4214) // bitfield is not int
-#endif 
+#endif
 
 // Minimal alignment necessary. On most platforms 16 bytes are needed
 // due to SSE registers for example. This must be at least `sizeof(void*)`
@@ -67,7 +67,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Encoded free lists allow detection of corrupted free lists
 // and can detect buffer overflows, modify after free, and double `free`s.
 #if (MI_SECURE>=3 || MI_DEBUG>=1)
-#define MI_ENCODE_FREELIST  1 
+#define MI_ENCODE_FREELIST  1
 #endif
 
 
@@ -175,8 +175,8 @@ typedef int32_t  mi_ssize_t;
 // Used as a special value to encode block sizes in 32 bits.
 #define MI_HUGE_BLOCK_SIZE   ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
 
-// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments 
-#define MI_ALIGNMENT_MAX   (MI_SEGMENT_SIZE >> 1)  
+// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments
+#define MI_ALIGNMENT_MAX   (MI_SEGMENT_SIZE >> 1)
 
 
 // ------------------------------------------------------
@@ -247,19 +247,19 @@ typedef uintptr_t mi_thread_free_t;
 // We don't count `freed` (as |free|) but use `used` to reduce
 // the number of memory accesses in the `mi_page_all_free` function(s).
 //
-// Notes: 
+// Notes:
 // - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
 // - Using `uint16_t` does not seem to slow things down
 // - The size is 8 words on 64-bit which helps the page index calculations
-//   (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 
+//   (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
 //    and 12 are still good for address calculation)
-// - To limit the structure size, the `xblock_size` is 32-bits only; for 
+// - To limit the structure size, the `xblock_size` is 32-bits only; for
 //   blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
 // - `thread_free` uses the bottom bits as a delayed-free flags to optimize
 //   concurrent frees where only the first concurrent free adds to the owning
 //   heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
 //   The invariant is that no-delayed-free is only set if there is
-//   at least one block that will be added, or as already been added, to 
+//   at least one block that will be added, or as already been added, to
 //   the owning heap `thread_delayed_free` list. This guarantees that pages
 //   will be freed correctly even if only other threads free blocks.
 typedef struct mi_page_s {
@@ -279,7 +279,7 @@ typedef struct mi_page_s {
 
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
-  uint32_t              xblock_size;       // size available in each block (always `>0`) 
+  uint32_t              xblock_size;       // size available in each block (always `>0`)
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
 
   #ifdef MI_ENCODE_FREELIST
@@ -288,7 +288,7 @@ typedef struct mi_page_s {
 
   _Atomic(mi_thread_free_t) xthread_free;  // list of deferred free blocks freed by other threads
   _Atomic(uintptr_t)        xheap;
-  
+
   struct mi_page_s*     next;              // next page owned by this thread with the same `block_size`
   struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
 } mi_page_t;
@@ -309,7 +309,7 @@ typedef struct mi_segment_s {
   // memory fields
   size_t               memid;            // id for the os-level memory manager
   bool                 mem_is_pinned;    // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
-  bool                 mem_is_committed; // `true` if the whole segment is eagerly committed  
+  bool                 mem_is_committed; // `true` if the whole segment is eagerly committed
   size_t               mem_alignment;    // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX)
   size_t               mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX)
 
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 3828bc25..031f4f1f 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -28,10 +28,10 @@ terms of the MIT license. A copy of the license can be found in the file
   #define mi_decl_nodiscard    [[nodiscard]]
 #elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)  // includes clang, icc, and clang-cl
   #define mi_decl_nodiscard    __attribute__((warn_unused_result))
-#elif defined(_HAS_NODISCARD)  
+#elif defined(_HAS_NODISCARD)
   #define mi_decl_nodiscard    _NODISCARD
 #elif (_MSC_VER >= 1700)
-  #define mi_decl_nodiscard    _Check_return_  
+  #define mi_decl_nodiscard    _Check_return_
 #else
   #define mi_decl_nodiscard
 #endif
@@ -159,8 +159,8 @@ mi_decl_export void mi_thread_init(void)      mi_attr_noexcept;
 mi_decl_export void mi_thread_done(void)      mi_attr_noexcept;
 mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
 
-mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, 
-                                    size_t* current_rss, size_t* peak_rss, 
+mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
+                                    size_t* current_rss, size_t* peak_rss,
                                     size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
 
 // -------------------------------------------------------------------------------------
@@ -318,16 +318,16 @@ typedef enum mi_option_e {
   mi_option_show_stats,               // print statistics on termination
   mi_option_verbose,                  // print verbose messages
   // the following options are experimental (see src/options.h)
-  mi_option_eager_commit,           
-  mi_option_eager_region_commit,    
+  mi_option_eager_commit,
+  mi_option_eager_region_commit,
   mi_option_reset_decommits,
   mi_option_large_os_pages,           // use large (2MiB) OS pages, implies eager commit
   mi_option_reserve_huge_os_pages,    // reserve N huge OS pages (1GiB) at startup
   mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
   mi_option_reserve_os_memory,        // reserve specified amount of OS memory at startup
-  mi_option_deprecated_segment_cache,             
-  mi_option_page_reset,               
-  mi_option_abandoned_page_reset,     
+  mi_option_deprecated_segment_cache,
+  mi_option_page_reset,
+  mi_option_abandoned_page_reset,
   mi_option_segment_reset,
   mi_option_eager_commit_delay,
   mi_option_reset_delay,
@@ -337,7 +337,7 @@ typedef enum mi_option_e {
   mi_option_max_errors,
   mi_option_max_warnings,
   mi_option_max_segment_reclaim,
-  mi_option_destroy_on_exit,          
+  mi_option_destroy_on_exit,
   _mi_option_last
 } mi_option_t;
 
@@ -424,7 +424,7 @@ template<class T> struct _mi_stl_allocator_common {
   typedef value_type const& const_reference;
   typedef value_type*       pointer;
   typedef value_type const* const_pointer;
-  
+
   #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900))  // C++11
   using propagate_on_container_copy_assignment = std::true_type;
   using propagate_on_container_move_assignment = std::true_type;
@@ -497,7 +497,7 @@ template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocato
 protected:
   std::shared_ptr<mi_heap_t> heap;
   template<class U> friend struct _mi_heap_stl_allocator_common;
-  
+
   _mi_heap_stl_allocator_common(bool destroy) {
     mi_heap_t* hp = mi_heap_new();
     this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete));  /* calls heap_delete/destroy when the refcount drops to zero */
@@ -526,7 +526,7 @@ template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x,
 template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
 
 
-// STL allocator allocation in a specific heap, where `free` does nothing and 
+// STL allocator allocation in a specific heap, where `free` does nothing and
 // the heap is destroyed in one go on destruction -- use with care!
 template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T> {
   using typename _mi_heap_stl_allocator_common<T>::size_type;
diff --git a/readme.md b/readme.md
index 64cd0c4b..40781200 100644
--- a/readme.md
+++ b/readme.md
@@ -12,7 +12,7 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the run-time systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
 
-Latest release tag: `v2.0.7` (2022-11-03).  
+Latest release tag: `v2.0.7` (2022-11-03).
 Latest stable  tag: `v1.7.7` (2022-11-03).
 
 mimalloc is a drop-in replacement for `malloc` and can be used in other programs
@@ -27,7 +27,7 @@ It also has an easy way to override the default allocator in [Windows](#override
   to integrate and adapt in other projects. For runtime systems it
   provides hooks for a monotonic _heartbeat_ and deferred freeing (for
   bounded worst-case times with reference counting).
-  Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS, 
+  Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS,
   Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding.
 - __free list sharding__: instead of one big free list (per size class) we have
   many smaller lists per "mimalloc page" which reduces fragmentation and
@@ -38,7 +38,7 @@ It also has an easy way to override the default allocator in [Windows](#override
   per mimalloc page, but for each page we have multiple free lists. In particular, there
   is one list for thread-local `free` operations, and another one for concurrent `free`
   operations. Free-ing from another thread can now be a single CAS without needing
-  sophisticated coordination between threads. Since there will be 
+  sophisticated coordination between threads. Since there will be
   thousands of separate free lists, contention is naturally distributed over the heap,
   and the chance of contending on a single location will be low -- this is quite
   similar to randomized algorithms like skip lists where adding
@@ -52,19 +52,19 @@ It also has an easy way to override the default allocator in [Windows](#override
   heap vulnerabilities. The performance penalty is usually around 10% on average
   over our benchmarks.
 - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
-  A heap can be destroyed at once instead of deallocating each object separately.  
+  A heap can be destroyed at once instead of deallocating each object separately.
 - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
-  times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low 
+  times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low
   internal fragmentation), and has no internal points of contention using only atomic operations.
 - __fast__: In our benchmarks (see [below](#performance)),
   _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
-  and often uses less memory. A nice property is that it does consistently well over a wide range 
+  and often uses less memory. A nice property is that it does consistently well over a wide range
   of benchmarks. There is also good huge OS page support for larger server programs.
 
 The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API.
-You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results.   
+You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results.
 
-Enjoy!  
+Enjoy!
 
 ### Branches
 
@@ -88,7 +88,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
 
 * 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on
   Windows 11, fix compilation with musl, potentially reduced
-  committed memory, add `bin/minject` for Windows, 
+  committed memory, add `bin/minject` for Windows,
   improved wasm support, faster aligned allocation,
   various small fixes.
 
@@ -100,9 +100,9 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
   thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
 
 * 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
-  
+
 * 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
-  
+
 * 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
   improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
 
@@ -116,9 +116,9 @@ Special thanks to:
   memory model bugs using the [genMC] model checker.
 * Weipeng Liu (@pongba), Zhuowei Li, Junhua Wang, and Jakub Szymanski, for their early support of mimalloc and deployment
   at large scale services, leading to many improvements in the mimalloc algorithms for large workloads.
-* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs 
+* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs
   in (early versions of) `mimalloc`.
-* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which 
+* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which
   uses mimalloc internally.
 
 
@@ -305,8 +305,8 @@ or via environment variables:
    of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
    and allocate just a little to take up space in the huge OS page area (which cannot be reset).
    The huge pages are usually allocated evenly among NUMA nodes.
-   We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all 
-   the huge pages at a specific numa node instead. 
+   We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
+   the huge pages at a specific numa node instead.
 
 Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
 for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
@@ -343,24 +343,24 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to
 
 ## Valgrind
 
-Generally, we recommend using the standard allocator with the amazing [Valgrind] tool (and 
-also for other address sanitizers). 
-However, it is possible to build mimalloc with Valgrind support. This has a small performance 
-overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final 
+Generally, we recommend using the standard allocator with the amazing [Valgrind] tool (and
+also for other address sanitizers).
+However, it is possible to build mimalloc with Valgrind support. This has a small performance
+overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final
 executables. To build with valgrind support, use the `MI_VALGRIND=ON` cmake option:
 
 ```
 > cmake ../.. -DMI_VALGRIND=ON
 ```
 
-This can also be combined with secure mode or debug mode. 
+This can also be combined with secure mode or debug mode.
 You can then run your programs directly under valgrind:
 
 ```
 > valgrind <myprogram>
 ```
 
-If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly), 
+If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly),
 you also need to tell `valgrind` to not intercept those calls itself, and use:
 
 ```
@@ -368,8 +368,8 @@ you also need to tell `valgrind` to not intercept those calls itself, and use:
 ```
 
 By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed
-used and not the standard allocator. Even though the [Valgrind option][valgrind-soname] 
-is called `--soname-synonyms`, this also 
+used and not the standard allocator. Even though the [Valgrind option][valgrind-soname]
+is called `--soname-synonyms`, this also
 works when overriding with a static library or object file. Unfortunately, it is not possible to
 dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`.
 See also the `test/test-wrong.c` file to test with `valgrind`.
@@ -574,7 +574,7 @@ The _alloc-test_, by
 [OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of
 allocations in various size classes. The test is scaled such that when an
 allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it
-means that it scales linearly. 
+means that it scales linearly.
 
 The _sh6bench_ and _sh8bench_ benchmarks are
 developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap.
@@ -755,4 +755,3 @@ free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af
 * 2019-10-07, `v1.1.0`: stable release 1.1.
 * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
 * 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
-
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 04528d9d..8de3412b 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -34,7 +34,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   size_t oversize;
   if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
     // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
-    // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the 
+    // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
     // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
     if mi_unlikely(offset != 0) {
       // todo: cannot support offset alignment for very large alignments yet
@@ -46,7 +46,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
     oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
     p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
     // zero afterwards as only the area from the aligned_p may be committed!
-    if (p == NULL) return NULL;    
+    if (p == NULL) return NULL;
   }
   else {
     // otherwise over-allocate
@@ -61,9 +61,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   mi_assert_internal(adjust < alignment);
   void* aligned_p = (void*)((uintptr_t)p + adjust);
   if (aligned_p != p) {
-    mi_page_set_has_aligned(_mi_ptr_page(p), true);  
+    mi_page_set_has_aligned(_mi_ptr_page(p), true);
   }
-  
+
   mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
   mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
@@ -74,10 +74,10 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
     ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE;
     #if MI_PADDING
     zsize -= MI_MAX_ALIGN_SIZE;
-    #endif 
+    #endif
     if (zsize > 0) { _mi_memzero(aligned_p, zsize); }
   }
-  
+
   #if MI_TRACK_ENABLED
   if (p != aligned_p) {
     mi_track_free_size(p, oversize);
@@ -87,7 +87,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
     mi_track_resize(aligned_p, oversize, size);
   }
   #endif
-  return aligned_p;  
+  return aligned_p;
 }
 
 // Primitive aligned allocation
@@ -109,7 +109,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
     return NULL;
   }
   */
-  if mi_unlikely(size > PTRDIFF_MAX) {          // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)                                                    
+  if mi_unlikely(size > PTRDIFF_MAX) {          // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
     #endif
@@ -306,4 +306,3 @@ mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t
 mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
   return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment);
 }
-
diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c
index ba2313a2..a2819a8b 100644
--- a/src/alloc-override-osx.c
+++ b/src/alloc-override-osx.c
@@ -19,8 +19,8 @@ terms of the MIT license. A copy of the license can be found in the file
    This is done through the malloc zone interface.
    It seems to be most robust in combination with interposing
    though or otherwise we may get zone errors as there are could
-   be allocations done by the time we take over the 
-   zone. 
+   be allocations done by the time we take over the
+   zone.
 ------------------------------------------------------ */
 
 #include <AvailabilityMacros.h>
@@ -215,7 +215,7 @@ static malloc_zone_t mi_malloc_zone = {
   .zone_name = "mimalloc",
   .batch_malloc = &zone_batch_malloc,
   .batch_free = &zone_batch_free,
-  .introspect = &mi_introspect,  
+  .introspect = &mi_introspect,
 #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
   #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
   .version = 10,
@@ -242,7 +242,7 @@ static malloc_zone_t mi_malloc_zone = {
 #if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT)
 
 // ------------------------------------------------------
-// Override malloc_xxx and malloc_zone_xxx api's to use only 
+// Override malloc_xxx and malloc_zone_xxx api's to use only
 // our mimalloc zone. Since even the loader uses malloc
 // on macOS, this ensures that all allocations go through
 // mimalloc (as all calls are interposed).
@@ -254,7 +254,7 @@ static malloc_zone_t mi_malloc_zone = {
 static inline malloc_zone_t* mi_get_default_zone(void)
 {
   static bool init;
-  if mi_unlikely(!init) { 
+  if mi_unlikely(!init) {
     init = true;
     malloc_zone_register(&mi_malloc_zone);  // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>)
   }
@@ -272,7 +272,7 @@ static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) {
   return mi_get_default_zone();
 }
 
-static malloc_zone_t* mi_malloc_default_zone (void) {   
+static malloc_zone_t* mi_malloc_default_zone (void) {
   return mi_get_default_zone();
 }
 
@@ -292,11 +292,11 @@ static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, v
   return KERN_SUCCESS;
 }
 
-static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {  
+static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {
   return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name);
 }
 
-static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {  
+static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {
   MI_UNUSED(zone); MI_UNUSED(name);
 }
 
@@ -306,7 +306,7 @@ static int mi_malloc_jumpstart(uintptr_t cookie) {
 }
 
 static void mi__malloc_fork_prepare(void) {
-  // nothing  
+  // nothing
 }
 static void mi__malloc_fork_parent(void) {
   // nothing
@@ -367,13 +367,13 @@ __attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[]
   MI_INTERPOSE_MI(malloc_destroy_zone),
   MI_INTERPOSE_MI(malloc_get_all_zones),
   MI_INTERPOSE_MI(malloc_get_zone_name),
-  MI_INTERPOSE_MI(malloc_jumpstart),  
+  MI_INTERPOSE_MI(malloc_jumpstart),
   MI_INTERPOSE_MI(malloc_printf),
   MI_INTERPOSE_MI(malloc_set_zone_name),
   MI_INTERPOSE_MI(_malloc_fork_child),
   MI_INTERPOSE_MI(_malloc_fork_parent),
   MI_INTERPOSE_MI(_malloc_fork_prepare),
-  
+
   MI_INTERPOSE_ZONE(zone_batch_free),
   MI_INTERPOSE_ZONE(zone_batch_malloc),
   MI_INTERPOSE_ZONE(zone_calloc),
@@ -416,7 +416,7 @@ static inline malloc_zone_t* mi_get_default_zone(void)
 }
 
 #if defined(__clang__)
-__attribute__((constructor(0))) 
+__attribute__((constructor(0)))
 #else
 __attribute__((constructor))      // seems not supported by g++-11 on the M1
 #endif
diff --git a/src/alloc-override.c b/src/alloc-override.c
index 70cf3367..ca508aa6 100644
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
 #endif
 
-#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) 
+#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32))
 
 #if defined(__APPLE__)
 #include <AvailabilityMacros.h>
@@ -43,8 +43,8 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   #define MI_FORWARD0(fun,x)      MI_FORWARD(fun)
   #define MI_FORWARD02(fun,x,y)   MI_FORWARD(fun)
 #else
-  // otherwise use forwarding by calling our `mi_` function 
-  #define MI_FORWARD1(fun,x)      { return fun(x); }       
+  // otherwise use forwarding by calling our `mi_` function
+  #define MI_FORWARD1(fun,x)      { return fun(x); }
   #define MI_FORWARD2(fun,x,y)    { return fun(x,y); }
   #define MI_FORWARD3(fun,x,y,z)  { return fun(x,y,z); }
   #define MI_FORWARD0(fun,x)      { fun(x); }
@@ -52,8 +52,8 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
 #endif
 
 
-#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)    
-  // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for 
+#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
+  // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for
   // functions that are interposed (or the interposing does not work)
   #define MI_OSX_IS_INTERPOSED
 
@@ -70,7 +70,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   };
   #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
   #define MI_INTERPOSE_MI(fun)            MI_INTERPOSE_FUN(fun,mi_##fun)
-  
+
   __attribute__((used)) static struct mi_interpose_s _mi_interposes[]  __attribute__((section("__DATA, __interpose"))) =
   {
     MI_INTERPOSE_MI(malloc),
@@ -84,7 +84,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
     MI_INTERPOSE_MI(valloc),
     MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked),
     MI_INTERPOSE_MI(malloc_good_size),
-    #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 
+    #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
     MI_INTERPOSE_MI(aligned_alloc),
     #endif
     #ifdef MI_OSX_ZONE
@@ -128,11 +128,11 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   // cannot override malloc unless using a dll.
   // we just override new/delete which does work in a static library.
 #else
-  // On all other systems forward to our API  
+  // On all other systems forward to our API
   mi_decl_export void* malloc(size_t size)              MI_FORWARD1(mi_malloc, size)
   mi_decl_export void* calloc(size_t size, size_t n)    MI_FORWARD2(mi_calloc, size, n)
   mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
-  mi_decl_export void  free(void* p)                    MI_FORWARD0(mi_free, p)  
+  mi_decl_export void  free(void* p)                    MI_FORWARD0(mi_free, p)
 #endif
 
 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
@@ -174,20 +174,20 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
   void operator delete  (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
   void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
-  
+
   void* operator new( std::size_t n, std::align_val_t al)   noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
   void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
   void* operator new  (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
   void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
   #endif
 
-#elif (defined(__GNUC__) || defined(__clang__)) 
+#elif (defined(__GNUC__) || defined(__clang__))
   // ------------------------------------------------------
   // Override by defining the mangled C++ names of the operators (as
   // used by GCC and CLang).
   // See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling>
   // ------------------------------------------------------
-  
+
   void _ZdlPv(void* p)            MI_FORWARD0(mi_free,p) // delete
   void _ZdaPv(void* p)            MI_FORWARD0(mi_free,p) // delete[]
   void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
@@ -196,12 +196,12 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   void _ZdaPvSt11align_val_t(void* p, size_t al)            { mi_free_aligned(p,al); }
   void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
   void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
-  
+
   #if (MI_INTPTR_SIZE==8)
     void* _Znwm(size_t n)                             MI_FORWARD1(mi_new,n)  // new 64-bit
     void* _Znam(size_t n)                             MI_FORWARD1(mi_new,n)  // new[] 64-bit
     void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
-    void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }     
+    void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
     void* _ZnwmSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
     void* _ZnamSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
     void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
@@ -210,7 +210,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
     void* _Znwj(size_t n)                             MI_FORWARD1(mi_new,n)  // new 64-bit
     void* _Znaj(size_t n)                             MI_FORWARD1(mi_new,n)  // new[] 64-bit
     void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
-    void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }   
+    void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
     void* _ZnwjSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
     void* _ZnajSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
     void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
@@ -240,22 +240,22 @@ extern "C" {
 
   // No forwarding here due to aliasing/name mangling issues
   void*  valloc(size_t size)               { return mi_valloc(size); }
-  void   vfree(void* p)                    { mi_free(p); }                
+  void   vfree(void* p)                    { mi_free(p); }
   size_t malloc_good_size(size_t size)     { return mi_malloc_good_size(size); }
   int    posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
-  
+
   // `aligned_alloc` is only available when __USE_ISOC11 is defined.
   // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
   // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
   // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it
   // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we  can avoid overriding it ourselves.
-  #if __USE_ISOC11 
+  #if __USE_ISOC11
   void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
   #endif
 #endif
 
 // no forwarding here due to aliasing/name mangling issues
-void  cfree(void* p)                                    { mi_free(p); } 
+void  cfree(void* p)                                    { mi_free(p); }
 void* pvalloc(size_t size)                              { return mi_pvalloc(size); }
 void* reallocarray(void* p, size_t count, size_t size)  { return mi_reallocarray(p, count, size); }
 int   reallocarr(void* p, size_t count, size_t size)    { return mi_reallocarr(p, count, size); }
diff --git a/src/alloc-posix.c b/src/alloc-posix.c
index e73628f4..e6505f29 100644
--- a/src/alloc-posix.c
+++ b/src/alloc-posix.c
@@ -91,7 +91,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size
       #endif
       return NULL;
     }
-  */  
+  */
   // C11 also requires alignment to be a power-of-two (and > 0) which is checked in mi_malloc_aligned
   void* p = mi_malloc_aligned(size, alignment);
   mi_assert_internal(((uintptr_t)p % alignment) == 0);
@@ -110,7 +110,7 @@ mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_att
     errno = EINVAL;
     return EINVAL;
   }
-  void** op = (void**)p;  
+  void** op = (void**)p;
   void* newp = mi_reallocarray(*op, count, size);
   if mi_unlikely(newp == NULL) { return errno; }
   *op = newp;
diff --git a/src/alloc.c b/src/alloc.c
index 01a0d4a4..6e468c85 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -30,7 +30,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
   mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
   mi_block_t* const block = page->free;
   if mi_unlikely(block == NULL) {
-    return _mi_malloc_generic(heap, size, zero, 0); 
+    return _mi_malloc_generic(heap, size, zero, 0);
   }
   mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
   // pop from the free list
@@ -38,21 +38,21 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
   page->free = mi_block_next(page, block);
   mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
 
-  // allow use of the block internally 
+  // allow use of the block internally
   // note: when tracking we need to avoid ever touching the MI_PADDING since
   // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc-track.h`)
   mi_track_mem_undefined(block, mi_page_usable_block_size(page));
-  
+
   // zero the block? note: we need to zero the full block size (issue #63)
   if mi_unlikely(zero) {
     mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
     const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size);
-    _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE);    
+    _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE);
   }
 
 #if (MI_DEBUG>0) && !MI_TRACK_ENABLED
-  if (!page->is_zero && !zero && !mi_page_is_huge(page)) { 
-    memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); 
+  if (!page->is_zero && !zero && !mi_page_is_huge(page)) {
+    memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
   }
 #elif (MI_SECURE!=0)
   if (!zero) { block->next = 0; } // don't leak internal data
@@ -91,7 +91,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
 
 static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
   mi_assert(heap != NULL);
-  #if MI_DEBUG 
+  #if MI_DEBUG
   const uintptr_t tid = _mi_thread_id();
   mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local
   #endif
@@ -232,9 +232,9 @@ static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* bloc
   mi_track_mem_defined(padding,sizeof(mi_padding_t));
   *delta = padding->delta;
   uint32_t canary = padding->canary;
-  uintptr_t keys[2]; 
+  uintptr_t keys[2];
   keys[0] = page->keys[0];
-  keys[1] = page->keys[1]; 
+  keys[1] = page->keys[1];
   bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize);
   mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
   return ok;
@@ -245,7 +245,7 @@ static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* bl
   size_t bsize;
   size_t delta;
   bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
-  mi_assert_internal(ok); mi_assert_internal(delta <= bsize);  
+  mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
   return (ok ? bsize - delta : 0);
 }
 
@@ -319,15 +319,15 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co
 // only maintain stats for smaller objects if requested
 #if (MI_STAT>0)
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-#if (MI_STAT < 2)  
+#if (MI_STAT < 2)
   MI_UNUSED(block);
 #endif
   mi_heap_t* const heap = mi_heap_get_default();
-  const size_t bsize = mi_page_usable_block_size(page);  
+  const size_t bsize = mi_page_usable_block_size(page);
 #if (MI_STAT>1)
   const size_t usize = mi_page_usable_size_of(page, block);
   mi_heap_stat_decrease(heap, malloc, usize);
-#endif  
+#endif
   if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
     mi_heap_stat_decrease(heap, normal, bsize);
 #if (MI_STAT > 1)
@@ -352,7 +352,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
 }
 #endif
 
-#if MI_HUGE_PAGE_ABANDON 
+#if MI_HUGE_PAGE_ABANDON
 #if (MI_STAT>0)
 // maintain stats for huge objects
 static void mi_stat_huge_free(const mi_page_t* page) {
@@ -383,7 +383,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
   // that is safe as these are constant and the page won't be freed (as the block is not freed yet).
   mi_check_padding(page, block);
   mi_padding_shrink(page, block, sizeof(mi_block_t));       // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
-  
+
   mi_segment_t* const segment = _mi_page_segment(page);
   if (segment->page_kind == MI_PAGE_HUGE) {
     #if MI_HUGE_PAGE_ABANDON
@@ -392,13 +392,13 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
     _mi_segment_huge_page_free(segment, page, block);
     return;
     #else
-    // huge pages are special as they occupy the entire segment 
+    // huge pages are special as they occupy the entire segment
     // as these are large we reset the memory occupied by the page so it is available to other threads
     // (as the owning thread needs to actually free the memory later).
     _mi_segment_huge_page_reset(segment, page, block);
     #endif
   }
-  
+
 
   #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED                    // note: when tracking, cannot use mi_usable_size with multi-threading
   memset(block, MI_DEBUG_FREED, mi_usable_size(block));
@@ -484,13 +484,13 @@ void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* p
   mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
   mi_stat_free(page, block);                 // stat_free may access the padding
   mi_track_free(p);
-  _mi_free_block(page, is_local, block);  
+  _mi_free_block(page, is_local, block);
 }
 
 // Get the segment data belonging to a pointer
 // This is just a single `and` in assembly but does further checks in debug mode
 // (and secure mode) if this was a valid pointer.
-static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) 
+static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
 {
   MI_UNUSED(msg);
   mi_assert(p != NULL);
@@ -524,7 +524,7 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
   return segment;
 }
 
-// Free a block 
+// Free a block
 // fast path written carefully to prevent spilling on the stack
 void mi_free(void* p) mi_attr_noexcept
 {
@@ -532,10 +532,10 @@ void mi_free(void* p) mi_attr_noexcept
   mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
   const bool          is_local= (_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
   mi_page_t* const    page    = _mi_segment_page_of(segment, p);
-  
+
   if mi_likely(is_local) {                       // thread-local free?
     if mi_likely(page->flags.full_aligned == 0)  // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
-    {  
+    {
       mi_block_t* const block = (mi_block_t*)p;
       if mi_unlikely(mi_check_is_double_free(page, block)) return;
       mi_check_padding(page, block);
@@ -546,7 +546,7 @@ void mi_free(void* p) mi_attr_noexcept
       mi_track_free(p);
       mi_block_set_next(page, block, page->local_free);
       page->local_free = block;
-      if mi_unlikely(--page->used == 0) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))    
+      if mi_unlikely(--page->used == 0) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))
         _mi_page_retire(page);
       }
     }
@@ -558,7 +558,7 @@ void mi_free(void* p) mi_attr_noexcept
   else {
     // not thread-local; use generic path
     _mi_free_generic(segment, page, false, p);
-  }  
+  }
 }
 
 // return true if successful
@@ -598,7 +598,7 @@ mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t
 static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
   if (p == NULL) return 0;
   const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
-  const mi_page_t* const page = _mi_segment_page_of(segment, p);  
+  const mi_page_t* const page = _mi_segment_page_of(segment, p);
   if mi_likely(!mi_page_has_aligned(page)) {
     const mi_block_t* block = (const mi_block_t*)p;
     return mi_page_usable_size_of(page, block);
@@ -679,7 +679,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) m
 // Expand (or shrink) in place (or fail)
 void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
   #if MI_PADDING
-  // we do not shrink/expand with padding enabled 
+  // we do not shrink/expand with padding enabled
   MI_UNUSED(p); MI_UNUSED(newsize);
   return NULL;
   #else
@@ -721,7 +721,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
 }
 
 mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
-  return _mi_heap_realloc_zero(heap, p, newsize, false);  
+  return _mi_heap_realloc_zero(heap, p, newsize, false);
 }
 
 mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
@@ -881,9 +881,9 @@ static bool mi_try_new_handler(bool nothrow) {
   #else
     std::new_handler h = std::set_new_handler();
     std::set_new_handler(h);
-  #endif  
+  #endif
   if (h==NULL) {
-    _mi_error_message(ENOMEM, "out of memory in 'new'");      
+    _mi_error_message(ENOMEM, "out of memory in 'new'");
     if (!nothrow) {
       throw std::bad_alloc();
     }
@@ -914,7 +914,7 @@ static std_new_handler_t mi_get_new_handler() {
 static bool mi_try_new_handler(bool nothrow) {
   std_new_handler_t h = mi_get_new_handler();
   if (h==NULL) {
-    _mi_error_message(ENOMEM, "out of memory in 'new'");       
+    _mi_error_message(ENOMEM, "out of memory in 'new'");
     if (!nothrow) {
       abort();  // cannot throw in plain C, use abort
     }
diff --git a/src/arena.c b/src/arena.c
index 0cc569ab..57f48a73 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -18,7 +18,7 @@ which is sometimes needed for embedded devices or shared memory for example.
 (We can also employ this with WASI or `sbrk` systems to reserve large arenas
  on demand and be able to reuse them efficiently).
 
-The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. 
+The arena allocation needs to be thread safe and we use an atomic bitmap to allocate.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
@@ -95,7 +95,7 @@ mi_arena_id_t _mi_arena_id_none(void) {
 }
 
 static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) {
-  return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || 
+  return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) ||
           (arena_id == req_arena_id));
 }
 
@@ -152,7 +152,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 ----------------------------------------------------------- */
 
 static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
-                                 bool* commit, bool* large, bool* is_pinned, bool* is_zero, 
+                                 bool* commit, bool* large, bool* is_pinned, bool* is_zero,
                                  mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
   MI_UNUSED(arena_index);
@@ -269,7 +269,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
     return NULL;
   }
   *is_zero = true;
-  *memid   = MI_MEMID_OS;  
+  *memid   = MI_MEMID_OS;
   void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats);
   if (p != NULL) { *is_pinned = *large; }
   return p;
@@ -333,7 +333,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset,
       // todo: use reset instead of decommit on windows?
       _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
     }
-    // and make it available to others again 
+    // and make it available to others again
     bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
     if (!all_inuse) {
       _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size);
@@ -372,8 +372,8 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is
     mi_assert_internal(is_committed);
     is_committed = true;
   }
-  
-  const size_t bcount = size / MI_ARENA_BLOCK_SIZE; 
+
+  const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
   const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
   const size_t bitmaps = (is_committed ? 2 : 3);
   const size_t asize  = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
@@ -411,7 +411,7 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is
 }
 
 // Reserve a range of regular OS memory
-int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept 
+int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept
 {
   if (arena_id != NULL) *arena_id = _mi_arena_id_none();
   size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
diff --git a/src/bitmap.c b/src/bitmap.c
index 88c8620c..9ba994d7 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -169,15 +169,15 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
 // between the fields. This is used in arena allocation
 //--------------------------------------------------------------------------
 
-// Try to atomically claim a sequence of `count` bits starting from the field 
+// Try to atomically claim a sequence of `count` bits starting from the field
 // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
 static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
 {
   mi_assert_internal(bitmap_idx != NULL);
-  
+
   // check initial trailing zeros
   mi_bitmap_field_t* field = &bitmap[idx];
-  size_t map = mi_atomic_load_relaxed(field);  
+  size_t map = mi_atomic_load_relaxed(field);
   const size_t initial = mi_clz(map);  // count of initial zeros starting at idx
   mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
   if (initial == 0)     return false;
@@ -212,14 +212,14 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
     newmap = map | initial_mask;
     if ((map & initial_mask) != 0) { goto rollback; };
   } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
-  
+
   // intermediate fields
   while (++field < final_field) {
     newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
     map = 0;
     if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
   }
-  
+
   // final field
   mi_assert_internal(field == final_field);
   map = mi_atomic_load_relaxed(field);
@@ -232,7 +232,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
   *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
   return true;
 
-rollback: 
+rollback:
   // roll back intermediate fields
   while (--field > initial_field) {
     newmap = 0;
@@ -246,7 +246,7 @@ rollback:
       mi_assert_internal((map & initial_mask) == initial_mask);
       newmap = map & ~initial_mask;
     } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
-  }  
+  }
   // retry? (we make a recursive call instead of goto to be able to use const declarations)
   if (retries < 4) {
     return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
@@ -311,7 +311,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
   size_t pre_mask;
   size_t mid_mask;
   size_t post_mask;
-  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);  
+  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
   bool all_one = true;
   mi_bitmap_field_t* field = &bitmap[idx];
   size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
@@ -324,7 +324,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
     prev = mi_atomic_and_acq_rel(field, ~post_mask);
     if ((prev & post_mask) != post_mask) all_one = false;
   }
-  return all_one;  
+  return all_one;
 }
 
 // Set `count` bits at `bitmap_idx` to 1 atomically
@@ -356,7 +356,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
 }
 
 
-// Returns `true` if all `count` bits were 1. 
+// Returns `true` if all `count` bits were 1.
 // `any_ones` is `true` if there was at least one bit set to one.
 static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
   size_t idx = mi_bitmap_index_field(bitmap_idx);
@@ -379,7 +379,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
     prev = mi_atomic_load_relaxed(field);
     if ((prev & post_mask) != post_mask) all_ones = false;
     if ((prev & post_mask) != 0) any_ones = true;
-  }  
+  }
   if (pany_ones != NULL) *pany_ones = any_ones;
   return all_ones;
 }
diff --git a/src/heap.c b/src/heap.c
index 3c73d935..0ed0ab2c 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -92,7 +92,7 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
   mi_collect_t collect = *((mi_collect_t*)arg_collect);
   _mi_page_free_collect(page, collect >= MI_FORCE);
   if (mi_page_all_free(page)) {
-    // no more used blocks, free the page. 
+    // no more used blocks, free the page.
     // note: this will free retired pages as well.
     _mi_page_free(page, pq, collect >= MI_FORCE);
   }
@@ -117,7 +117,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   if (heap==NULL || !mi_heap_is_initialized(heap)) return;
   _mi_deferred_free(heap, collect >= MI_FORCE);
 
-  // note: never reclaim on collect but leave it to threads that need storage to reclaim 
+  // note: never reclaim on collect but leave it to threads that need storage to reclaim
   if (
   #ifdef NDEBUG
       collect == MI_FORCE
@@ -130,7 +130,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
     // if all memory is freed by now, all segments should be freed.
     _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
   }
-  
+
   // if abandoning, mark all pages to no longer add to delayed_free
   if (collect == MI_ABANDON) {
     mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
@@ -240,7 +240,7 @@ static void mi_heap_free(mi_heap_t* heap) {
   // remove ourselves from the thread local heaps list
   // linear search but we expect the number of heaps to be relatively small
   mi_heap_t* prev = NULL;
-  mi_heap_t* curr = heap->tld->heaps; 
+  mi_heap_t* curr = heap->tld->heaps;
   while (curr != heap && curr != NULL) {
     prev = curr;
     curr = curr->next;
@@ -353,8 +353,8 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
 
   // reduce the size of the delayed frees
   _mi_heap_delayed_free_partial(from);
-  
-  // transfer all pages by appending the queues; this will set a new heap field 
+
+  // transfer all pages by appending the queues; this will set a new heap field
   // so threads may do delayed frees in either heap for a while.
   // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state
   // so after this only the new heap will get delayed frees
@@ -367,17 +367,17 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
   }
   mi_assert_internal(from->page_count == 0);
 
-  // and do outstanding delayed frees in the `from` heap  
+  // and do outstanding delayed frees in the `from` heap
   // note: be careful here as the `heap` field in all those pages no longer point to `from`,
-  // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a 
+  // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
   // the regular `_mi_free_delayed_block` which is safe.
-  _mi_heap_delayed_free_all(from);  
+  _mi_heap_delayed_free_all(from);
   #if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
   mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
   #endif
 
   // and reset the `from` heap
-  mi_heap_reset_pages(from);  
+  mi_heap_reset_pages(from);
 }
 
 // Safe delete a heap without freeing any still allocated blocks in that heap.
diff --git a/src/init.c b/src/init.c
index 66c7e74b..b5a98403 100644
--- a/src/init.c
+++ b/src/init.c
@@ -173,7 +173,7 @@ typedef struct mi_thread_data_s {
 
 // Thread meta-data is allocated directly from the OS. For
 // some programs that do not use thread pools and allocate and
-// destroy many OS threads, this may causes too much overhead 
+// destroy many OS threads, this may causes too much overhead
 // per thread so we maintain a small cache of recently freed metadata.
 
 #define TD_CACHE_SIZE (8)
@@ -185,7 +185,7 @@ static mi_thread_data_t* mi_thread_data_alloc(void) {
   for (int i = 0; i < TD_CACHE_SIZE; i++) {
     td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
     if (td != NULL) {
-      td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); 
+      td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
       if (td != NULL) {
         return td;
       }
@@ -262,7 +262,7 @@ static bool _mi_heap_init(void) {
     tld->segments.stats = &tld->stats;
     tld->segments.os = &tld->os;
     tld->os.stats = &tld->stats;
-    _mi_heap_set_default_direct(heap);    
+    _mi_heap_set_default_direct(heap);
   }
   return false;
 }
@@ -295,9 +295,9 @@ static bool _mi_heap_done(mi_heap_t* heap) {
   if (heap != &_mi_heap_main) {
     _mi_heap_collect_abandon(heap);
   }
-  
+
   // merge stats
-  _mi_stats_done(&heap->tld->stats);  
+  _mi_stats_done(&heap->tld->stats);
 
   // free if not the main thread
   if (heap != &_mi_heap_main) {
@@ -305,8 +305,8 @@ static bool _mi_heap_done(mi_heap_t* heap) {
     mi_thread_data_free((mi_thread_data_t*)heap);
   }
   else {
-    mi_thread_data_collect(); // free cached thread metadata  
-    #if 0  
+    mi_thread_data_collect(); // free cached thread metadata
+    #if 0
     // never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
     // there may still be delete/free calls after the mi_fls_done is called. Issue #207
     _mi_heap_destroy_pages(heap);
@@ -342,7 +342,7 @@ static void _mi_thread_done(mi_heap_t* default_heap);
   // use thread local storage keys to detect thread ending
   #include <windows.h>
   #include <fibersapi.h>
-  #if (_WIN32_WINNT < 0x600)  // before Windows Vista 
+  #if (_WIN32_WINNT < 0x600)  // before Windows Vista
   WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
   WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
   WINBASEAPI BOOL  WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
@@ -397,7 +397,7 @@ void mi_thread_init(void) mi_attr_noexcept
 {
   // ensure our process has started already
   mi_process_init();
-  
+
   // initialize the thread local default heap
   // (this will call `_mi_heap_set_default_direct` and thus set the
   //  fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
@@ -418,7 +418,7 @@ static void _mi_thread_done(mi_heap_t* heap) {
 
   // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
   if (heap->thread_id != _mi_thread_id()) return;
-  
+
   // abandon the thread local heap
   if (_mi_heap_done(heap)) return;  // returns true if already ran
 }
@@ -509,11 +509,11 @@ static void mi_process_load(void) {
   os_preloading = false;
   mi_assert_internal(_mi_is_main_thread());
   #if !(defined(_WIN32) && defined(MI_SHARED_LIB))  // use Dll process detach (see below) instead of atexit (issue #521)
-  atexit(&mi_process_done);  
+  atexit(&mi_process_done);
   #endif
   _mi_options_init();
   mi_process_setup_auto_thread_done();
-  mi_process_init();  
+  mi_process_init();
   if (mi_redirected) _mi_verbose_message("malloc is redirected.\n");
 
   // show message from the redirector (if present)
@@ -578,7 +578,7 @@ void mi_process_init(void) mi_attr_noexcept {
     } else {
       mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
     }
-  } 
+  }
   if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
     long ksize = mi_option_get(mi_option_reserve_os_memory);
     if (ksize > 0) {
@@ -601,7 +601,7 @@ static void mi_cdecl mi_process_done(void) {
   #endif
 
   #ifndef MI_SKIP_COLLECT_ON_EXIT
-    #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)  
+    #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)
     // free all memory if possible on process exit. This is not needed for a stand-alone process
     // but should be done if mimalloc is statically linked into another shared library which
     // is repeatedly loaded/unloaded, see issue #281.
@@ -620,7 +620,7 @@ static void mi_cdecl mi_process_done(void) {
   if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
     mi_stats_print(NULL);
   }
-  mi_allocator_done();  
+  mi_allocator_done();
   _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id);
   os_preloading = true; // don't call the C runtime anymore
 }
@@ -642,7 +642,7 @@ static void mi_cdecl mi_process_done(void) {
       if (!mi_is_redirected()) {
         mi_thread_done();
       }
-    }    
+    }
     return TRUE;
   }
 
diff --git a/src/options.c b/src/options.c
index 4add09e0..eead6450 100644
--- a/src/options.c
+++ b/src/options.c
@@ -106,7 +106,7 @@ void _mi_options_init(void) {
   for(int i = 0; i < _mi_option_last; i++ ) {
     mi_option_t option = (mi_option_t)i;
     long l = mi_option_get(option); MI_UNUSED(l); // initialize
-    // if (option != mi_option_verbose) 
+    // if (option != mi_option_verbose)
     {
       mi_option_desc_t* desc = &options[option];
       _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
@@ -177,7 +177,7 @@ static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
   #ifdef _WIN32
   // on windows with redirection, the C runtime cannot handle locale dependent output
   // after the main thread closes so we use direct console output.
-  if (!_mi_preloading()) { 
+  if (!_mi_preloading()) {
     // _cputs(msg);  // _cputs cannot be used at is aborts if it fails to lock the console
     static HANDLE hcon = INVALID_HANDLE_VALUE;
     if (hcon == INVALID_HANDLE_VALUE) {
@@ -281,7 +281,7 @@ static _Atomic(size_t) warning_count; // = 0;  // when >= max_warning_count stop
 // inside the C runtime causes another message.
 // In some cases (like on macOS) the loader already allocates which
 // calls into mimalloc; if we then access thread locals (like `recurse`)
-// this may crash as the access may call _tlv_bootstrap that tries to 
+// this may crash as the access may call _tlv_bootstrap that tries to
 // (recursively) invoke malloc again to allocate space for the thread local
 // variables on demand. This is why we use a _mi_preloading test on such
 // platforms. However, C code generator may move the initial thread local address
@@ -407,7 +407,7 @@ static _Atomic(void*) mi_error_arg;     // = NULL
 
 static void mi_error_default(int err) {
   MI_UNUSED(err);
-#if (MI_DEBUG>0) 
+#if (MI_DEBUG>0)
   if (err==EFAULT) {
     #ifdef _MSC_VER
     __debugbreak();
@@ -500,23 +500,23 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
   return (len > 0 && len < result_size);
 }
 #elif !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0)
-// On Posix systemsr use `environ` to acces environment variables 
+// On Posix systemsr use `environ` to acces environment variables
 // even before the C runtime is initialized.
 #if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>)
 #include <crt_externs.h>
 static char** mi_get_environ(void) {
   return (*_NSGetEnviron());
 }
-#else 
+#else
 extern char** environ;
 static char** mi_get_environ(void) {
   return environ;
 }
 #endif
 static bool mi_getenv(const char* name, char* result, size_t result_size) {
-  if (name==NULL) return false;  
+  if (name==NULL) return false;
   const size_t len = strlen(name);
-  if (len == 0) return false;  
+  if (len == 0) return false;
   char** env = mi_get_environ();
   if (env == NULL) return false;
   // compare up to 256 entries
@@ -530,7 +530,7 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
   }
   return false;
 }
-#else  
+#else
 // fallback: use standard C `getenv` but this cannot be used while initializing the C runtime
 static bool mi_getenv(const char* name, char* result, size_t result_size) {
   // cannot call getenv() when still initializing the C runtime.
@@ -558,7 +558,7 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
 #endif  // !MI_USE_ENVIRON
 #endif  // !MI_NO_GETENV
 
-static void mi_option_init(mi_option_desc_t* desc) {  
+static void mi_option_init(mi_option_desc_t* desc) {
   // Read option value from the environment
   char buf[64+1];
   mi_strlcpy(buf, "mimalloc_", sizeof(buf));
diff --git a/src/os.c b/src/os.c
index 6631f65f..6a45c6b9 100644
--- a/src/os.c
+++ b/src/os.c
@@ -99,7 +99,7 @@ static size_t os_alloc_granularity = 4096;
 // if non-zero, use large page allocation
 static size_t large_os_page_size = 0;
 
-// is memory overcommit allowed? 
+// is memory overcommit allowed?
 // set dynamically in _mi_os_init (and if true we use MAP_NORESERVE)
 static bool os_overcommit = true;
 
@@ -150,7 +150,7 @@ typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E {
   MiMemExtendedParameterUserPhysicalHandle,
   MiMemExtendedParameterAttributeFlags,
   MiMemExtendedParameterMax
-} MI_MEM_EXTENDED_PARAMETER_TYPE; 
+} MI_MEM_EXTENDED_PARAMETER_TYPE;
 
 typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S {
   struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type;
@@ -216,7 +216,7 @@ static bool mi_win_enable_large_os_pages(void)
   return (ok!=0);
 }
 
-void _mi_os_init(void) 
+void _mi_os_init(void)
 {
   os_overcommit = false;
   // get the page size
@@ -277,9 +277,9 @@ static void os_detect_overcommit(void) {
   size_t olen = sizeof(val);
   if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) {
     os_overcommit = (val != 0);
-  }  
+  }
 #else
-  // default: overcommit is true  
+  // default: overcommit is true
 #endif
 }
 
@@ -317,10 +317,10 @@ static int mi_madvise(void* addr, size_t length, int advice) {
 static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
 
 // Return a MI_SEGMENT_SIZE aligned address that is probably available.
-// If this returns NULL, the OS will determine the address but on some OS's that may not be 
+// If this returns NULL, the OS will determine the address but on some OS's that may not be
 // properly aligned which can be more costly as it needs to be adjusted afterwards.
-// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; 
-// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses 
+// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
+// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
 //  in the middle of the 2TiB - 6TiB address range (see issue #372))
 
 #define MI_HINT_BASE ((uintptr_t)2 << 40)  // 2TiB start
@@ -394,12 +394,12 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
 #endif
   if (was_committed) { _mi_stat_decrease(&stats->committed, size); }
   _mi_stat_decrease(&stats->reserved, size);
-  return !err;  
+  return !err;
 }
 
 
 /* -----------------------------------------------------------
-  Raw allocation on Windows (VirtualAlloc) 
+  Raw allocation on Windows (VirtualAlloc)
 -------------------------------------------------------------- */
 
 #ifdef _WIN32
@@ -414,7 +414,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
       _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
       // fall through on error
     }
-  } 
+  }
 #endif
   // on modern Windows try use VirtualAlloc2 for aligned allocation
   if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
@@ -472,12 +472,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
 -------------------------------------------------------------- */
 
 #elif defined(MI_USE_SBRK) || defined(__wasi__)
-#if defined(MI_USE_SBRK) 
+#if defined(MI_USE_SBRK)
   static void* mi_memory_grow( size_t size ) {
     void* p = sbrk(size);
     if (p == (void*)(-1)) return NULL;
     #if !defined(__wasi__) // on wasi this is always zero initialized already (?)
-    memset(p,0,size); 
+    memset(p,0,size);
     #endif
     return p;
   }
@@ -485,8 +485,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
   static void* mi_memory_grow( size_t size ) {
     size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size()))
                             : __builtin_wasm_memory_size(0));
-    if (base == SIZE_MAX) return NULL;     
-    return (void*)(base * _mi_os_page_size());    
+    if (base == SIZE_MAX) return NULL;
+    return (void*)(base * _mi_os_page_size());
   }
 #endif
 
@@ -498,7 +498,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) {
   void* p = NULL;
   if (try_alignment <= 1) {
     // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now)
-    #if defined(MI_USE_PTHREADS) 
+    #if defined(MI_USE_PTHREADS)
     pthread_mutex_lock(&mi_heap_grow_mutex);
     #endif
     p = mi_memory_grow(size);
@@ -520,7 +520,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) {
       if (current != NULL) {
         void* aligned_current = mi_align_up_ptr(current, try_alignment);  // and align from there to minimize wasted space
         alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size());
-        base = mi_memory_grow(alloc_size);        
+        base = mi_memory_grow(alloc_size);
       }
     }
     #if defined(MI_USE_PTHREADS)
@@ -537,7 +537,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) {
     }
   }
   if (p == NULL) {
-    _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment);    
+    _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment);
     errno = ENOMEM;
     return NULL;
   }
@@ -548,10 +548,10 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) {
 /* -----------------------------------------------------------
   Raw allocation on Unix's (mmap)
 -------------------------------------------------------------- */
-#else 
+#else
 #define MI_OS_USE_MMAP
 static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
-  MI_UNUSED(try_alignment);  
+  MI_UNUSED(try_alignment);
   #if defined(MAP_ALIGNED)  // BSD
   if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
     size_t n = mi_bsr(try_alignment);
@@ -582,7 +582,7 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr
   #endif
   // regular mmap
   void* p = mmap(addr, size, protect_flags, flags, fd, 0);
-  if (p!=MAP_FAILED) return p;  
+  if (p!=MAP_FAILED) return p;
   // failed to allocate
   return NULL;
 }
@@ -599,7 +599,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   int fd = -1;
   if (_mi_os_has_overcommit()) {
     flags |= MAP_NORESERVE;
-  }  
+  }
   #if defined(PROT_MAX)
   protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
   #endif
@@ -688,7 +688,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
         if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
           *is_large = true;
         }
-      }      
+      }
       #endif
     }
   }
@@ -756,7 +756,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
   // try first with a hint (this will be aligned directly on Win 10+ or BSD)
   void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats);
   if (p == NULL) return NULL;
-  
+
   // if not aligned, free it, overallocate, and unmap around it
   if (((uintptr_t)p % alignment != 0)) {
     mi_os_mem_free(p, size, commit, stats);
@@ -768,7 +768,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
     // over-allocate uncommitted (virtual) memory
     p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats);
     if (p == NULL) return NULL;
-    
+
     // set p to the aligned part in the full region
     // note: this is dangerous on Windows as VirtualFree needs the actual region pointer
     // but in mi_os_mem_free we handle this (hopefully exceptional) situation.
@@ -844,7 +844,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
   OS aligned allocation with an offset. This is used
   for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
   page where the object can be aligned at an offset from the start of the segment.
-  As we may need to overallocate, we need to free such pointers using `mi_free_aligned` 
+  As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
   to use the actual start of the memory region.
 ----------------------------------------------------------- */
 
@@ -877,7 +877,7 @@ void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_of
   mi_assert(align_offset <= MI_SEGMENT_SIZE);
   const size_t extra = _mi_align_up(align_offset, alignment) - align_offset;
   void* start = (uint8_t*)p - extra;
-  _mi_os_free_ex(start, size + extra, was_committed, tld_stats);  
+  _mi_os_free_ex(start, size + extra, was_committed, tld_stats);
 }
 
 /* -----------------------------------------------------------
@@ -957,7 +957,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
     // commit: just change the protection
     err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
     if (err != 0) { err = errno; }
-  } 
+  }
   else {
     // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss)
     const int fd = mi_unix_mmap_fd();
@@ -967,10 +967,10 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   #else
   // Linux, macOSX and others.
   if (commit) {
-    // commit: ensure we can access the area    
+    // commit: ensure we can access the area
     err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
     if (err != 0) { err = errno; }
-  } 
+  }
   else {
     #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0
     // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
@@ -1007,7 +1007,7 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
   return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
 }
 
-bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {  
+bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
   return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats);
 }
 
@@ -1046,7 +1046,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   int oadvice = (int)mi_atomic_load_relaxed(&advice);
   int err;
   while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0;  };
-  if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {  
+  if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
     // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
     mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
     err = mi_madvise(start, csize, MADV_DONTNEED);
@@ -1079,7 +1079,7 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat
   MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
   *is_zero = false;
-  return mi_os_resetx(addr, size, false, stats);  
+  return mi_os_resetx(addr, size, false, stats);
 }
 
 
@@ -1188,7 +1188,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
     params[0].Arg.ULong = (unsigned)numa_node;
     return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
   }
-  
+
   // otherwise use regular virtual alloc on older windows
   return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
 }
@@ -1337,7 +1337,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
 /* ----------------------------------------------------------------------------
 Support NUMA aware allocation
 -----------------------------------------------------------------------------*/
-#ifdef _WIN32  
+#ifdef _WIN32
 static size_t mi_os_numa_nodex(void) {
   USHORT numa_node = 0;
   if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
@@ -1353,7 +1353,7 @@ static size_t mi_os_numa_nodex(void) {
     DWORD pnum = GetCurrentProcessorNumber();
     UCHAR nnode = 0;
     BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode);
-    if (ok) numa_node = nnode;    
+    if (ok) numa_node = nnode;
   }
   return numa_node;
 }
@@ -1457,7 +1457,7 @@ size_t _mi_os_numa_node_count_get(void) {
     else {
       count = mi_os_numa_node_countx(); // or detect dynamically
       if (count == 0) count = 1;
-    }    
+    }
     mi_atomic_store_release(&_mi_numa_node_count, count); // save it
     _mi_verbose_message("using %zd numa regions\n", count);
   }
diff --git a/src/page-queue.c b/src/page-queue.c
index 61cd1310..fbfe2811 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -76,7 +76,7 @@ static inline uint8_t mi_bin(size_t size) {
     bin = MI_BIN_HUGE;
   }
   else {
-    #if defined(MI_ALIGN4W) 
+    #if defined(MI_ALIGN4W)
     if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
     #endif
     wsize--;
@@ -303,7 +303,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
   for (mi_page_t* page = append->first; page != NULL; page = page->next) {
     // inline `mi_page_set_heap` to avoid wrong assertion during absorption;
     // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
-    mi_atomic_store_release(&page->xheap, (uintptr_t)heap); 
+    mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
     // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a
     // side effect that it spins until any DELAYED_FREEING is finished. This ensures
     // that after appending only the new heap will be used for delayed free operations.
diff --git a/src/page.c b/src/page.c
index 8a24c13f..91dd0c06 100644
--- a/src/page.c
+++ b/src/page.c
@@ -112,7 +112,7 @@ bool _mi_page_is_valid(mi_page_t* page) {
     mi_segment_t* segment = _mi_page_segment(page);
     mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
     #if MI_HUGE_PAGE_ABANDON
-    if (segment->page_kind != MI_PAGE_HUGE) 
+    if (segment->page_kind != MI_PAGE_HUGE)
     #endif
     {
       mi_page_queue_t* pq = mi_page_queue_of(page);
@@ -134,7 +134,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
 bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
   mi_thread_free_t tfreex;
   mi_delayed_t     old_delay;
-  mi_thread_free_t tfree;  
+  mi_thread_free_t tfree;
   size_t yield_count = 0;
   do {
     tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
@@ -262,7 +262,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
   mi_assert_internal(pq != NULL);
   mi_assert_internal(mi_heap_contains_queue(heap, pq));
   mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size);
-  #endif  
+  #endif
   mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
   if (page == NULL) {
     // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
@@ -413,7 +413,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   _mi_segment_page_free(page, force, segments_tld);
 }
 
-#define MI_MAX_RETIRE_SIZE    MI_LARGE_OBJ_SIZE_MAX  
+#define MI_MAX_RETIRE_SIZE    MI_LARGE_OBJ_SIZE_MAX
 #define MI_RETIRE_CYCLES      (8)
 
 // Retire a page with no more used blocks
@@ -603,7 +603,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
   if (page->capacity >= page->reserved) return;
 
   size_t page_size;
-  //uint8_t* page_start = 
+  //uint8_t* page_start =
   _mi_page_start(_mi_page_segment(page), page, &page_size);
   mi_stat_counter_increase(tld->stats.pages_extended, 1);
 
@@ -615,7 +615,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
   size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
   if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
   mi_assert_internal(max_extend > 0);
-    
+
   if (extend > max_extend) {
     // ensure we don't touch memory beyond the page to reduce page commit.
     // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
@@ -734,7 +734,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
     page = mi_page_fresh(heap, pq);
     if (page == NULL && first_try) {
       // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
-      page = mi_page_queue_find_free_ex(heap, pq, false);      
+      page = mi_page_queue_find_free_ex(heap, pq, false);
     }
   }
   else {
@@ -752,17 +752,17 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
   mi_page_queue_t* pq = mi_page_queue(heap,size);
   mi_page_t* page = pq->first;
   if (page != NULL) {
-   #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness      
+   #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
     if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
       mi_page_extend_free(heap, page, heap->tld);
       mi_assert_internal(mi_page_immediate_available(page));
     }
-    else 
+    else
    #endif
     {
       _mi_page_free_collect(page,false);
     }
-    
+
     if (mi_page_immediate_available(page)) {
       page->retire_expire = 0;
       return page; // fast path
@@ -825,7 +825,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
     #if MI_HUGE_PAGE_ABANDON
     mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
     mi_page_set_heap(page, NULL);
-    #endif    
+    #endif
 
     if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
       mi_heap_stat_increase(heap, giant, bsize);
@@ -844,7 +844,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
 // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
 static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
   // huge allocation?
-  const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
+  const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`
   if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
     if mi_unlikely(req_size > PTRDIFF_MAX) {  // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
       _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
@@ -863,7 +863,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme
 
 // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
 // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for 
+// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for
 // very large requested alignments in which case we use a huge segment.
 void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept
 {
@@ -891,7 +891,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
   }
 
   if mi_unlikely(page == NULL) { // out of memory
-    const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
+    const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`
     _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size);
     return NULL;
   }
diff --git a/src/random.c b/src/random.c
index 60d64ef7..bf7e5b83 100644
--- a/src/random.c
+++ b/src/random.c
@@ -169,8 +169,8 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
 #if defined(_WIN32)
 
 #if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus)
-// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using 
-// dynamic overriding, we observed it can raise an exception when compiled with C++, and 
+// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
+// dynamic overriding, we observed it can raise an exception when compiled with C++, and
 // sometimes deadlocks when also running under the VS debugger.
 // In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
 // To be continued..
@@ -203,7 +203,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
 static bool os_random_buf(void* buf, size_t buf_len) {
   #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
     // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
-    // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>      
+    // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
     return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
   #else
     // fall back on older macOS
@@ -281,7 +281,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
 
 uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
   uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random
-  
+
   #if defined(_WIN32)
     LARGE_INTEGER pcount;
     QueryPerformanceCounter(&pcount);
@@ -325,7 +325,7 @@ static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) {
 }
 
 void _mi_random_init(mi_random_ctx_t* ctx) {
-  mi_random_init_ex(ctx, false);  
+  mi_random_init_ex(ctx, false);
 }
 
 void _mi_random_init_weak(mi_random_ctx_t * ctx) {
diff --git a/src/region.c b/src/region.c
index 7bcc951a..3571abb6 100644
--- a/src/region.c
+++ b/src/region.c
@@ -16,8 +16,8 @@ We need this memory layer between the raw OS calls because of:
 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
    to reuse memory effectively.
 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
-   an OS allocation/free is still (much) too expensive relative to the accesses 
-   in that object :-( (`malloc-large` tests this). This means we need a cheaper 
+   an OS allocation/free is still (much) too expensive relative to the accesses
+   in that object :-( (`malloc-large` tests this). This means we need a cheaper
    way to reuse memory.
 3. This layer allows for NUMA aware allocation.
 
@@ -59,7 +59,7 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offs
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map 
+#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map
 #elif (MI_INTPTR_SIZE==4)
 #define MI_HEAP_REGION_MAX_SIZE    (3 * MI_GiB)    // ~ KiB for the region map
 #else
@@ -70,11 +70,11 @@ void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offs
 #define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
 #define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
 #define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
-#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)  
+#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
 
-// Region info 
+// Region info
 typedef union mi_region_info_u {
-  size_t value;      
+  size_t value;
   struct {
     bool  valid;        // initialized?
     bool  is_large:1;   // allocated in fixed large/huge OS pages
@@ -88,7 +88,7 @@ typedef union mi_region_info_u {
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
   _Atomic(size_t)           info;        // mi_region_info_t.value
-  _Atomic(void*)            start;       // start of the memory area 
+  _Atomic(void*)            start;       // start of the memory area
   mi_bitmap_field_t         in_use;      // bit per in-use block
   mi_bitmap_field_t         dirty;       // track if non-zero per block
   mi_bitmap_field_t         commit;      // track if committed per block
@@ -101,7 +101,7 @@ typedef struct mem_region_s {
 static mem_region_t regions[MI_REGION_MAX];
 
 // Allocated regions
-static _Atomic(size_t) regions_count; // = 0;        
+static _Atomic(size_t) regions_count; // = 0;
 
 
 /* ----------------------------------------------------------------------------
@@ -136,7 +136,7 @@ mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
 static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
   uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
   mi_assert_internal(start != NULL);
-  return (start + (bit_idx * MI_SEGMENT_SIZE));  
+  return (start + (bit_idx * MI_SEGMENT_SIZE));
 }
 
 static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
@@ -205,7 +205,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
   mi_atomic_store_ptr_release(void,&r->start, start);
 
-  // and share it 
+  // and share it
   mi_region_info_t info;
   info.value = 0;                        // initialize the full union to zero
   info.x.valid = true;
@@ -242,7 +242,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
 
 static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
-  // try all regions for a free slot  
+  // try all regions for a free slot
   const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
   size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
   for (size_t visited = 0; visited < count; visited++, idx++) {
@@ -276,7 +276,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
       return NULL;
     }
   }
-  
+
   // ------------------------------------------------
   // found a region and claimed `blocks` at `bit_idx`, initialize them now
   mi_assert_internal(region != NULL);
@@ -288,7 +288,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
   mi_assert_internal(!(info.x.is_large && !*large));
   mi_assert_internal(start != NULL);
 
-  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);  
+  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);
   *large     = info.x.is_large;
   *is_pinned = info.x.is_pinned;
   *memid     = mi_memid_create(region, bit_idx);
@@ -307,20 +307,20 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
         mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
         return NULL;
       }
-      if (commit_zero) *is_zero = true;      
+      if (commit_zero) *is_zero = true;
     }
   }
   else {
     // no need to commit, but check if already fully committed
     *commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
-  }  
+  }
   mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
 
   // unreset reset blocks
   if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
     // some blocks are still reset
     mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); 
+    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
     mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
     if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
       bool reset_zero = false;
@@ -329,13 +329,13 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
     }
   }
   mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
-  
+
   #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
   if (*commit) { ((uint8_t*)p)[0] = 0; }
   #endif
-  
-  // and return the allocation  
-  mi_assert_internal(p != NULL);  
+
+  // and return the allocation
+  mi_assert_internal(p != NULL);
   return p;
 }
 
@@ -354,7 +354,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset,
   *is_zero = false;
   *is_pinned = false;
   bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`  
+  if (large==NULL) large = &default_large;  // ensure `large != NULL`
   if (size == 0) return NULL;
   size = _mi_align_up(size, _mi_os_page_size());
 
@@ -363,7 +363,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset,
   size_t arena_memid;
   const size_t blocks = mi_region_block_count(size);
   if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
-    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);    
+    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
     if (p == NULL) {
       _mi_warning_message("unable to allocate from region: size %zu\n", size);
     }
@@ -428,9 +428,9 @@ void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, s
     }
 
     // reset the blocks to reduce the working set.
-    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) 
+    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
        && (mi_option_is_enabled(mi_option_eager_commit) ||
-           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead            
+           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
     {
       bool any_unreset;
       _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
@@ -438,7 +438,7 @@ void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, s
         _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
         _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
       }
-    }    
+    }
 
     // and unclaim
     bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
@@ -467,7 +467,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
         memset((void*)&regions[i], 0, sizeof(mem_region_t));  // cast to void* to avoid atomic warning
         // and release the whole region
         mi_atomic_store_release(&region->info, (size_t)0);
-        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {         
+        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
           _mi_abandoned_await_readers(); // ensure no pending reads
           _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats);
         }
diff --git a/src/segment.c b/src/segment.c
index 85cac395..c3cb7155 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -193,8 +193,8 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t*
     mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_psize == 0);
     mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_psize, os_psize, protect);
     #if (MI_SECURE >= 2)
-    if (segment->capacity == 1) 
-    #endif 
+    if (segment->capacity == 1)
+    #endif
     {
       // and protect the last (or only) page too
       mi_assert_internal(MI_SECURE <= 1 || segment->page_kind >= MI_PAGE_LARGE);
@@ -221,7 +221,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t*
         }
       }
     }
-    #endif  
+    #endif
   }
 }
 
@@ -404,11 +404,11 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
     // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
     size_t adjust = block_size - ((uintptr_t)p % block_size);
     if (psize - adjust >= block_size) {
-      if (adjust < block_size) {      
+      if (adjust < block_size) {
         p += adjust;
         psize -= adjust;
         if (pre_size != NULL) *pre_size = adjust;
-      }    
+      }
       mi_assert_internal((uintptr_t)p % block_size == 0);
     }
   }
@@ -481,7 +481,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
 }
 
 // called by threads that are terminating to free cached segments
-void _mi_segment_thread_collect(mi_segments_tld_t* tld) {  
+void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
   MI_UNUSED_RELEASE(tld);
 #if MI_DEBUG>=2
   if (!_mi_is_main_thread()) {
@@ -567,15 +567,15 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   const bool eager  = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager; // || (page_kind >= MI_PAGE_LARGE);
   bool is_zero = false;
-  
+
   // Allocate the segment from the OS (segment_size can change due to alignment)
   mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, pre_size, info_size, &segment_size, &is_zero, &commit, tld, os_tld);
   if (segment == NULL) return NULL;
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
-  mi_assert_internal(segment->mem_is_pinned ? segment->mem_is_committed : true);    
-    
+  mi_assert_internal(segment->mem_is_pinned ? segment->mem_is_committed : true);
+
   mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);  // tsan
-  
+
   // zero the segment info (but not the `mem` fields)
   ptrdiff_t ofs = offsetof(mi_segment_t, next);
   memset((uint8_t*)segment + ofs, 0, info_size - ofs);
@@ -588,7 +588,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     segment->pages[i].is_committed = commit;
     segment->pages[i].is_zero_init = is_zero;
   }
-  
+
   // initialize
   segment->page_kind  = page_kind;
   segment->capacity   = capacity;
@@ -646,7 +646,7 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg
   // check commit
   if (!page->is_committed) {
     mi_assert_internal(!segment->mem_is_pinned);
-    mi_assert_internal(!page->is_reset);    
+    mi_assert_internal(!page->is_reset);
     size_t psize;
     uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
     bool is_zero = false;
@@ -663,7 +663,7 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg
   // check reset
   if (page->is_reset) {
     mi_assert_internal(!segment->mem_is_pinned);
-    bool ok = mi_page_unreset(segment, page, 0, tld); 
+    bool ok = mi_page_unreset(segment, page, 0, tld);
     if (!ok) {
       page->segment_in_use = false;
       segment->used--;
@@ -809,7 +809,7 @@ static mi_decl_cache_align _Atomic(mi_segment_t*)       abandoned_visited; // =
 static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned;         // = NULL
 
 // Maintain these for debug purposes (these counts may be a bit off)
-static mi_decl_cache_align _Atomic(size_t)           abandoned_count; 
+static mi_decl_cache_align _Atomic(size_t)           abandoned_count;
 static mi_decl_cache_align _Atomic(size_t)           abandoned_visited_count;
 
 // We also maintain a count of current readers of the abandoned list
@@ -1086,7 +1086,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
 {
   *reclaimed = false;
   mi_segment_t* segment;
-  long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024);     // limit the work to bound allocation times  
+  long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024);     // limit the work to bound allocation times
   while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
     segment->abandoned_visits++;
     bool all_pages_free;
@@ -1126,7 +1126,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s
 {
   mi_assert_internal(page_kind <= MI_PAGE_LARGE);
   mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
-  
+
   // 1. try to reclaim an abandoned segment
   bool reclaimed;
   mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld);
@@ -1220,7 +1220,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   #if MI_HUGE_PAGE_ABANDON
   segment->thread_id = 0; // huge pages are immediately abandoned
   mi_segments_track_size(-(long)segment->segment_size, tld);
-  #endif  
+  #endif
   mi_page_t* page = mi_segment_find_free(segment, tld);
   mi_assert_internal(page != NULL);
 
@@ -1272,8 +1272,8 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
 #endif
 }
 
-#else 
-// reset memory of a huge block from another thread 
+#else
+// reset memory of a huge block from another thread
 void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
   mi_assert_internal(segment->page_kind == MI_PAGE_HUGE);
   mi_assert_internal(segment == _mi_page_segment(page));
@@ -1282,7 +1282,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc
   if (!segment->mem_is_pinned && page->is_committed) {
     const size_t usize = mi_usable_size(block) - sizeof(mi_block_t);
     uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
-    _mi_os_reset(p, usize, &_mi_stats_main); 
+    _mi_os_reset(p, usize, &_mi_stats_main);
   }
 }
 #endif
diff --git a/src/stats.c b/src/stats.c
index 1e2fe4d5..c63e5cc4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 static bool mi_is_in_main(void* stat) {
   return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main
-         && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));  
+         && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));
 }
 
 static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
@@ -51,7 +51,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
   }
 }
 
-void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {  
+void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
   if (mi_is_in_main(stat)) {
     mi_atomic_addi64_relaxed( &stat->count, 1 );
     mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount );
@@ -77,7 +77,7 @@ static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64
   mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit);
   mi_atomic_addi64_relaxed( &stat->current, src->current * unit);
   mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit);
-  // peak scores do not work across threads.. 
+  // peak scores do not work across threads..
   mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit);
 }
 
@@ -129,11 +129,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
   Display statistics
 ----------------------------------------------------------- */
 
-// unit > 0 : size in binary bytes 
+// unit > 0 : size in binary bytes
 // unit == 0: count as decimal
 // unit < 0 : count in binary
 static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
-  char buf[32]; buf[0] = 0;  
+  char buf[32]; buf[0] = 0;
   int  len = 32;
   const char* suffix = (unit <= 0 ? " " : "B");
   const int64_t base = (unit == 0 ? 1000 : 1024);
@@ -146,7 +146,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void*
     }
   }
   else {
-    int64_t divider = base;    
+    int64_t divider = base;
     const char* magnitude = "K";
     if (pos >= divider*base) { divider *= base; magnitude = "M"; }
     if (pos >= divider*base) { divider *= base; magnitude = "G"; }
@@ -208,7 +208,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64
   else {
     mi_print_amount(stat->peak, 1, out, arg);
     mi_print_amount(stat->allocated, 1, out, arg);
-    _mi_fprintf(out, arg, "%11s", " ");  // no freed 
+    _mi_fprintf(out, arg, "%11s", " ");  // no freed
     mi_print_amount(stat->current, 1, out, arg);
     _mi_fprintf(out, arg, "\n");
   }
@@ -225,7 +225,7 @@ static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg
 }
 
 static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) {
-  const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); 
+  const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count));
   const long avg_whole = (long)(avg_tens/10);
   const long avg_frac1 = (long)(avg_tens%10);
   _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
@@ -265,7 +265,7 @@ typedef struct buffered_s {
   mi_output_fun* out;   // original output function
   void*          arg;   // and state
   char*          buf;   // local buffer of at least size `count+1`
-  size_t         used;  // currently used chars `used <= count`  
+  size_t         used;  // currently used chars `used <= count`
   size_t         count; // total chars available for output
 } buffered_t;
 
@@ -336,7 +336,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_print(&stats->threads, "threads", -1, out, arg);
   mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
   _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count());
-  
+
   mi_msecs_t elapsed;
   mi_msecs_t user_time;
   mi_msecs_t sys_time;
@@ -354,7 +354,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
     _mi_fprintf(out, arg, ", commit: ");
     mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s");
   }
-  _mi_fprintf(out, arg, "\n");  
+  _mi_fprintf(out, arg, "\n");
 }
 
 static mi_msecs_t mi_process_start; // = 0
@@ -414,7 +414,7 @@ static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
     mfreq.QuadPart = f.QuadPart/1000LL;
     if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
   }
-  return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);  
+  return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
 }
 
 mi_msecs_t _mi_clock_now(void) {
@@ -429,7 +429,7 @@ mi_msecs_t _mi_clock_now(void) {
   struct timespec t;
   #ifdef CLOCK_MONOTONIC
   clock_gettime(CLOCK_MONOTONIC, &t);
-  #else  
+  #else
   clock_gettime(CLOCK_REALTIME, &t);
   #endif
   return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
@@ -476,7 +476,7 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
   return msecs;
 }
 
-static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) 
+static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
 {
   *elapsed = _mi_clock_end(mi_process_start);
   FILETIME ct;
@@ -492,7 +492,7 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
   *peak_rss       = (size_t)info.PeakWorkingSetSize;
   *current_commit = (size_t)info.PagefileUsage;
   *peak_commit    = (size_t)info.PeakPagefileUsage;
-  *page_faults    = (size_t)info.PageFaultCount;  
+  *page_faults    = (size_t)info.PageFaultCount;
 }
 
 #elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__))
@@ -525,7 +525,7 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
   // estimate commit using our stats
   *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
   *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *current_rss    = *current_commit;  // estimate 
+  *current_rss    = *current_commit;  // estimate
 #if defined(__HAIKU__)
   // Haiku does not have (yet?) a way to
   // get these stats per process
@@ -546,7 +546,7 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
   }
 #else
   *peak_rss = rusage.ru_maxrss * 1024;  // Linux reports in KiB
-#endif  
+#endif
 }
 
 #else
@@ -578,7 +578,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
   size_t peak_rss0 = 0;
   size_t current_commit0 = 0;
   size_t peak_commit0 = 0;
-  size_t page_faults0 = 0;  
+  size_t page_faults0 = 0;
   mi_stat_process_info(&elapsed,&utime, &stime, &current_rss0, &peak_rss0, &current_commit0, &peak_commit0, &page_faults0);
   if (elapsed_msecs!=NULL)  *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX));
   if (user_msecs!=NULL)     *user_msecs     = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX));
@@ -589,4 +589,3 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
   if (peak_commit!=NULL)    *peak_commit    = peak_commit0;
   if (page_faults!=NULL)    *page_faults    = page_faults0;
 }
-
diff --git a/test/main-override-static.c b/test/main-override-static.c
index dbd0fab8..fcdea4d3 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -34,14 +34,14 @@ int main() {
   // negative_stat();
   // test_heap_walk();
   // alloc_huge();
-  
+
   void* p1 = malloc(78);
   void* p2 = malloc(24);
   free(p1);
   p1 = mi_malloc(8);
   char* s = strdup("hello\n");
   free(p2);
-  
+
   p2 = malloc(16);
   p1 = realloc(p1, 32);
   free(p1);
@@ -53,10 +53,10 @@ int main() {
   //free(p1);
   //p2 = malloc(32);
   //mi_free(p2);
-  
+
   //mi_collect(true);
   //mi_stats_print(NULL);
-  
+
   // test_process_info();
   return 0;
 }
@@ -153,7 +153,7 @@ static void test_process_info(void) {
   size_t peak_rss = 0;
   size_t current_commit = 0;
   size_t peak_commit = 0;
-  size_t page_faults = 0;  
+  size_t page_faults = 0;
   for (int i = 0; i < 100000; i++) {
     void* p = calloc(100,10);
     free(p);
@@ -185,7 +185,7 @@ static void negative_stat(void) {
   mi_stats_print_out(NULL, NULL);
   *p = 100;
   mi_free(p);
-  mi_stats_print_out(NULL, NULL);  
+  mi_stats_print_out(NULL, NULL);
 }
 
 static void alloc_huge(void) {
@@ -383,4 +383,3 @@ static void mi_bins(void) {
   }
 }
 #endif
-
diff --git a/test/main-override.cpp b/test/main-override.cpp
index 63bf20d8..37d4daae 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -50,7 +50,7 @@ int main() {
   padding_shrink();
   various_tests();
   tsan_numa_test();
-  strdup_test();  
+  strdup_test();
   test_stl_allocators();
   test_mt_shutdown();
   */
@@ -286,7 +286,7 @@ static void fail_aslr() {
 // issues #414
 static void dummy_worker() {
   void* p = mi_malloc(0);
-  mi_free(p);  
+  mi_free(p);
 }
 
 static void tsan_numa_test() {
diff --git a/test/test-api-fill.c b/test/test-api-fill.c
index c205637c..85d8524f 100644
--- a/test/test-api-fill.c
+++ b/test/test-api-fill.c
@@ -328,7 +328,7 @@ bool check_debug_fill_freed(uint8_t* p, size_t size) {
 #if MI_VALGRIND
   (void)p; (void)size;
   return true; // when compiled with valgrind we don't fill on purpose
-#else  
+#else
   if(!p)
     return false;
 
@@ -337,6 +337,6 @@ bool check_debug_fill_freed(uint8_t* p, size_t size) {
     result &= p[i] == MI_DEBUG_FREED;
   }
   return result;
-#endif  
+#endif
 }
 #endif
diff --git a/test/test-api.c b/test/test-api.c
index c6d289de..e47bc1e4 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -57,7 +57,7 @@ int main(void) {
   // ---------------------------------------------------
 
   CHECK_BODY("malloc-zero") {
-    void* p = mi_malloc(0); 
+    void* p = mi_malloc(0);
     result = (p != NULL);
     mi_free(p);
   };
@@ -83,7 +83,7 @@ int main(void) {
 
   // ---------------------------------------------------
   // Extended
-  // ---------------------------------------------------  
+  // ---------------------------------------------------
   CHECK_BODY("posix_memalign1") {
     void* p = &p;
     int err = mi_posix_memalign(&p, sizeof(void*), 32);
@@ -122,7 +122,7 @@ int main(void) {
     void* p = mi_malloc_aligned(48,32); result = (p != NULL && (uintptr_t)(p) % 32 == 0); mi_free(p);
   };
   CHECK_BODY("malloc-aligned3") {
-    void* p1 = mi_malloc_aligned(48,32); bool result1 = (p1 != NULL && (uintptr_t)(p1) % 32 == 0); 
+    void* p1 = mi_malloc_aligned(48,32); bool result1 = (p1 != NULL && (uintptr_t)(p1) % 32 == 0);
     void* p2 = mi_malloc_aligned(48,32); bool result2 = (p2 != NULL && (uintptr_t)(p2) % 32 == 0);
     mi_free(p2);
     mi_free(p1);
@@ -138,9 +138,9 @@ int main(void) {
     result = ok;
   };
   CHECK_BODY("malloc-aligned5") {
-    void* p = mi_malloc_aligned(4097,4096); 
-    size_t usable = mi_usable_size(p); 
-    result = (usable >= 4097 && usable < 16000); 
+    void* p = mi_malloc_aligned(4097,4096);
+    size_t usable = mi_usable_size(p);
+    result = (usable >= 4097 && usable < 16000);
     printf("malloc_aligned5: usable size: %zi\n", usable);
     mi_free(p);
   };
@@ -187,18 +187,18 @@ int main(void) {
       }
       for (int j = 0; j < 8; j++) {
         mi_free(p[j]);
-      }      
+      }
     }
     result = ok;
   };
   CHECK_BODY("malloc-aligned10") {
     bool ok = true;
     void* p[10+1];
-    int align;    
+    int align;
     int j;
     for(j = 0, align = 1; j <= 10 && ok; align *= 2, j++ ) {
       p[j] = mi_malloc_aligned(43 + align, align);
-      ok = ((uintptr_t)p[j] % align) == 0;            
+      ok = ((uintptr_t)p[j] % align) == 0;
     }
     for ( ; j > 0; j--) {
       mi_free(p[j-1]);
@@ -216,7 +216,7 @@ int main(void) {
   };
   CHECK_BODY("malloc-aligned-at2") {
     void* p = mi_malloc_aligned_at(50,32,8); result = (p != NULL && ((uintptr_t)(p) + 8) % 32 == 0); mi_free(p);
-  };  
+  };
   CHECK_BODY("memalign1") {
     void* p;
     bool ok = true;
@@ -226,7 +226,7 @@ int main(void) {
     }
     result = ok;
   };
-  
+
   // ---------------------------------------------------
   // Reallocation
   // ---------------------------------------------------
diff --git a/test/test-stress.c b/test/test-stress.c
index 9033e223..298d48b0 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -218,7 +218,7 @@ static void test_leak(void) {
 }
 #endif
 
-int main(int argc, char** argv) {  
+int main(int argc, char** argv) {
   // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
   if (argc >= 2) {
     char* end;
@@ -256,7 +256,7 @@ int main(int argc, char** argv) {
   mi_collect(true);
   #endif
   mi_stats_print(NULL);
-#endif  
+#endif
   //bench_end_program();
   return 0;
 }
diff --git a/test/test-wrong.c b/test/test-wrong.c
index 6c3d5a74..17d253b6 100644
--- a/test/test-wrong.c
+++ b/test/test-wrong.c
@@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file
    > cmake ../.. -DMI_VALGRIND=1
    > make -j8
 
-   and then compile this file as: 
+   and then compile this file as:
 
    > gcc -g -o test-wrong -I../../include ../../test/test-wrong.c libmimalloc-valgrind-debug.a -lpthread
 
@@ -32,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 int main(int argc, char** argv) {
   int* p = (int*)mi(malloc)(3*sizeof(int));
-  
+
   int* r = (int*)mi_malloc_aligned(8,16);
   mi_free(r);
 
@@ -47,7 +47,7 @@ int main(int argc, char** argv) {
 
   // illegal int read
   printf("invalid: over: %d, under: %d\n", q[1], q[-1]);
-  
+
   *q = 42;
 
   // buffer overflow
@@ -55,7 +55,7 @@ int main(int argc, char** argv) {
 
   // buffer underflow
   q[-1] = 44;
-  
+
   mi(free)(q);
 
   // double free
@@ -66,5 +66,5 @@ int main(int argc, char** argv) {
 
   // leak p
   // mi_free(p)
-  return 0;  
+  return 0;
 }
\ No newline at end of file
diff --git a/test/testhelper.h b/test/testhelper.h
index 44776b74..a9727584 100644
--- a/test/testhelper.h
+++ b/test/testhelper.h
@@ -19,12 +19,12 @@ static int failed = 0;
 
 static bool check_result(bool result, const char* testname, const char* fname, long lineno) {
   if (!(result)) {
-    failed++; 
+    failed++;
     fprintf(stderr,"\n  FAILED: %s: %s:%ld\n", testname, fname, lineno);
-    /* exit(1); */ 
-  } 
-  else {    
-    ok++;                               
+    /* exit(1); */
+  }
+  else {
+    ok++;
     fprintf(stderr, "ok.\n");
   }
   return true;

From 2c40e0d02fc4633441bc7622d147488b1b9fd62b Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Tue, 18 Oct 2022 11:03:03 +0200
Subject: [PATCH 66/88] Allow running in Windows Nano Server containers

The `GetNumaProcessorNode()` symbol is not defined in Nano Server's DLLs
(because that function is long deprecated). This causes problems with
any executable that uses mimalloc when trying to run on Nano Server.

Instead of importing this function statically, try to import it
dynamically, and fall back gracefully if it cannot be loaded.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 src/os.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/os.c b/src/os.c
index 6a45c6b9..df1088cc 100644
--- a/src/os.c
+++ b/src/os.c
@@ -177,9 +177,11 @@ typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; }
 typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
 typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
 typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
+typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber);
 static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
 static PGetNumaProcessorNodeEx      pGetNumaProcessorNodeEx = NULL;
 static PGetNumaNodeProcessorMaskEx  pGetNumaNodeProcessorMaskEx = NULL;
+static PGetNumaProcessorNode        pGetNumaProcessorNode = NULL;
 
 static bool mi_win_enable_large_os_pages(void)
 {
@@ -245,6 +247,7 @@ void _mi_os_init(void)
     pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
     pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
     pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
+    pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
     FreeLibrary(hDll);
   }
   if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
@@ -1348,11 +1351,11 @@ static size_t mi_os_numa_nodex(void) {
     BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
     if (ok) numa_node = nnode;
   }
-  else {
+  else if (pGetNumaProcessorNode != NULL) {
     // Vista or earlier, use older API that is limited to 64 processors. Issue #277
     DWORD pnum = GetCurrentProcessorNumber();
     UCHAR nnode = 0;
-    BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode);
+    BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode);
     if (ok) numa_node = nnode;
   }
   return numa_node;

From 11cab6a0c07b7af6c0531a4f6f6df9d10f5557f3 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 12 Aug 2022 13:41:53 +0200
Subject: [PATCH 67/88] Avoid `%z` format

This format is not supported by MSVC runtimes targeted by the mingw-64
toolchain.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index eead6450..f7cf29cf 100644
--- a/src/options.c
+++ b/src/options.c
@@ -347,7 +347,7 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
 static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) {
   if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) {
     char tprefix[64];
-    snprintf(tprefix, sizeof(tprefix), "%sthread 0x%zx: ", prefix, _mi_thread_id());
+    snprintf(tprefix, sizeof(tprefix), "%sthread 0x%llx: ", prefix, (unsigned long long)_mi_thread_id());
     mi_vfprintf(out, arg, tprefix, fmt, args);
   }
   else {

From 948a0c44df4b3a609c9add162aaff28049e5e567 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Sun, 20 Oct 2019 22:05:35 +0200
Subject: [PATCH 68/88] Lazy-load `psapi.dll`

Let's load the `GetProcessMemoryInfo()` function dynamically. When
needed. If needed.

This is necessary because the start-up cost spent on loading dynamic
libraries is non-negligible.

Note: In contrast to how `os.c` loads libraries and obtains function
addresses, we cannot call `FreeLibrary(hDll)` here because that would
unload the `bcrypt` library before we want to use it.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 src/stats.c | 44 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index c63e5cc4..5170a68b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -465,8 +465,6 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
 
 #if defined(_WIN32)
 #include <windows.h>
-#include <psapi.h>
-#pragma comment(lib,"psapi.lib")
 
 static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
   ULARGE_INTEGER i;
@@ -476,6 +474,23 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
   return msecs;
 }
 
+typedef struct _PROCESS_MEMORY_COUNTERS {
+  DWORD cb;
+  DWORD PageFaultCount;
+  SIZE_T PeakWorkingSetSize;
+  SIZE_T WorkingSetSize;
+  SIZE_T QuotaPeakPagedPoolUsage;
+  SIZE_T QuotaPagedPoolUsage;
+  SIZE_T QuotaPeakNonPagedPoolUsage;
+  SIZE_T QuotaNonPagedPoolUsage;
+  SIZE_T PagefileUsage;
+  SIZE_T PeakPagefileUsage;
+} PROCESS_MEMORY_COUNTERS;
+typedef PROCESS_MEMORY_COUNTERS* PPROCESS_MEMORY_COUNTERS;
+typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
+static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
+static int GetProcessMemoryInfo_is_initialized = 0;
+
 static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
 {
   *elapsed = _mi_clock_end(mi_process_start);
@@ -487,12 +502,25 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
   *utime = filetime_msecs(&ut);
   *stime = filetime_msecs(&st);
   PROCESS_MEMORY_COUNTERS info;
-  GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
-  *current_rss    = (size_t)info.WorkingSetSize;
-  *peak_rss       = (size_t)info.PeakWorkingSetSize;
-  *current_commit = (size_t)info.PagefileUsage;
-  *peak_commit    = (size_t)info.PeakPagefileUsage;
-  *page_faults    = (size_t)info.PageFaultCount;
+
+  if (!GetProcessMemoryInfo_is_initialized) {
+    HINSTANCE hDll;
+    hDll = LoadLibrary(TEXT("psapi.dll"));
+    if (hDll != NULL) {
+      pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo");
+    }
+    GetProcessMemoryInfo_is_initialized = 1;
+  }
+  if (pGetProcessMemoryInfo) {
+    pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
+    *current_rss    = (size_t)info.WorkingSetSize;
+    *peak_rss       = (size_t)info.PeakWorkingSetSize;
+    *current_commit = (size_t)info.PagefileUsage;
+    *peak_commit    = (size_t)info.PeakPagefileUsage;
+    *page_faults    = (size_t)info.PageFaultCount;
+  } else {
+    *current_rss = *peak_rss = *current_commit = *peak_commit = *page_faults = 0;
+  }
 }
 
 #elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__))

From edb181c377b88e7b87fe3efeb1eed10d1ed87eb7 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 12 Aug 2022 13:49:22 +0200
Subject: [PATCH 69/88] Lazy-link to `bcrypt`

Let's not make `bcrypt.dl` a link-time bound library. Instead, load the
`BCryptGenRandom()` function dynamically. When needed. If needed.

This helps reduce the start-up cost of any mimalloc user because the
time spent on loading dynamic libraries is non-negligible.

Note: In contrast to how `os.c` loads libraries and obtains function
addresses, we cannot call `FreeLibrary(hDll)` here because that would
unload the `bcrypt` library before we want to use it.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 src/random.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/random.c b/src/random.c
index bf7e5b83..7e0bdd9f 100644
--- a/src/random.c
+++ b/src/random.c
@@ -187,10 +187,27 @@ static bool os_random_buf(void* buf, size_t buf_len) {
   return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
 }
 #else
-#pragma comment (lib,"bcrypt.lib")
-#include <bcrypt.h>
+
+#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG
+#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002
+#endif
+
+typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG);
+static PBCryptGenRandom pBCryptGenRandom = NULL;
+static int BCryptGenRandom_is_initialized = 0;
+
 static bool os_random_buf(void* buf, size_t buf_len) {
-  return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+  if (!BCryptGenRandom_is_initialized) {
+    HINSTANCE  hDll;
+    hDll = LoadLibrary(TEXT("bcrypt.dll"));
+    if (hDll != NULL) {
+      pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom");
+    }
+    BCryptGenRandom_is_initialized = 1;
+  }
+  if (!pBCryptGenRandom)
+    return 0;
+  return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
 }
 #endif
 

From 089e85b1132d8a10b17196f5a4dd970a5c6efcca Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 12 Aug 2022 14:23:36 +0200
Subject: [PATCH 70/88] Make MIMALLOC_SHOW_STATS work with redirected `stderr`

Setting `MIMALLOC_SHOW_STATS` to ask mimalloc to print out something
after the process is done is the easiest way to verify that a
mimalloc-enabled Git is running.

So it better work and not try to write to a Win32 Console when it got a
regular file handle instead or, as is the case in Git for Windows'
regular Git Bash window, an emulated pseudo terminal.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 src/options.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/options.c b/src/options.c
index f7cf29cf..d467459b 100644
--- a/src/options.c
+++ b/src/options.c
@@ -180,8 +180,15 @@ static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
   if (!_mi_preloading()) {
     // _cputs(msg);  // _cputs cannot be used at is aborts if it fails to lock the console
     static HANDLE hcon = INVALID_HANDLE_VALUE;
+    static int write_to_console;
     if (hcon == INVALID_HANDLE_VALUE) {
+      CONSOLE_SCREEN_BUFFER_INFO sbi;
       hcon = GetStdHandle(STD_ERROR_HANDLE);
+      write_to_console = GetConsoleScreenBufferInfo(hcon, &sbi) ? 1 : 0;
+    }
+    if (!write_to_console) {
+      fputs(msg, stderr);
+      return;
     }
     const size_t len = strlen(msg);
     if (hcon != INVALID_HANDLE_VALUE && len > 0 && len < UINT32_MAX) {

From 505a14bbd82d35d7f21eaa283339f76c27c69501 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 16:50:13 -0800
Subject: [PATCH 71/88] cleanup on-demand loading of psapi and bcrypt on
 windows

---
 src/random.c | 18 +++++++++---------
 src/stats.c  | 31 +++++++++++++++----------------
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/src/random.c b/src/random.c
index 7e0bdd9f..06d4ba4a 100644
--- a/src/random.c
+++ b/src/random.c
@@ -193,21 +193,21 @@ static bool os_random_buf(void* buf, size_t buf_len) {
 #endif
 
 typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG);
-static PBCryptGenRandom pBCryptGenRandom = NULL;
-static int BCryptGenRandom_is_initialized = 0;
+static  PBCryptGenRandom pBCryptGenRandom = NULL;
 
 static bool os_random_buf(void* buf, size_t buf_len) {
-  if (!BCryptGenRandom_is_initialized) {
-    HINSTANCE  hDll;
-    hDll = LoadLibrary(TEXT("bcrypt.dll"));
+  if (pBCryptGenRandom == NULL) {
+    HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll"));
     if (hDll != NULL) {
       pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom");
     }
-    BCryptGenRandom_is_initialized = 1;
   }
-  if (!pBCryptGenRandom)
-    return 0;
-  return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+  if (pBCryptGenRandom == NULL) {
+    return false;
+  }
+  else {
+    return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+  }
 }
 #endif
 
diff --git a/src/stats.c b/src/stats.c
index 5170a68b..363c4400 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -489,7 +489,6 @@ typedef struct _PROCESS_MEMORY_COUNTERS {
 typedef PROCESS_MEMORY_COUNTERS* PPROCESS_MEMORY_COUNTERS;
 typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
 static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
-static int GetProcessMemoryInfo_is_initialized = 0;
 
 static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
 {
@@ -501,26 +500,26 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
   GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
   *utime = filetime_msecs(&ut);
   *stime = filetime_msecs(&st);
-  PROCESS_MEMORY_COUNTERS info;
-
-  if (!GetProcessMemoryInfo_is_initialized) {
-    HINSTANCE hDll;
-    hDll = LoadLibrary(TEXT("psapi.dll"));
+  
+  // load psapi on demand
+  if (pGetProcessMemoryInfo == NULL) {
+    HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
     if (hDll != NULL) {
       pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo");
     }
-    GetProcessMemoryInfo_is_initialized = 1;
   }
-  if (pGetProcessMemoryInfo) {
+
+  // get process info
+  PROCESS_MEMORY_COUNTERS info;
+  memset(&info, 0, sizeof(info));
+  if (pGetProcessMemoryInfo != NULL) {
     pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
-    *current_rss    = (size_t)info.WorkingSetSize;
-    *peak_rss       = (size_t)info.PeakWorkingSetSize;
-    *current_commit = (size_t)info.PagefileUsage;
-    *peak_commit    = (size_t)info.PeakPagefileUsage;
-    *page_faults    = (size_t)info.PageFaultCount;
-  } else {
-    *current_rss = *peak_rss = *current_commit = *peak_commit = *page_faults = 0;
-  }
+  } 
+  *current_rss    = (size_t)info.WorkingSetSize;
+  *peak_rss       = (size_t)info.PeakWorkingSetSize;
+  *current_commit = (size_t)info.PagefileUsage;
+  *peak_commit    = (size_t)info.PeakPagefileUsage;
+  *page_faults    = (size_t)info.PageFaultCount;
 }
 
 #elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__))

From dfc95f34c480c6051ea8587eb97938d42b5051ee Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 16:57:50 -0800
Subject: [PATCH 72/88] first try WriteFile for a redirected stderr

---
 src/options.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/options.c b/src/options.c
index d467459b..669f57d0 100644
--- a/src/options.c
+++ b/src/options.c
@@ -180,20 +180,26 @@ static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
   if (!_mi_preloading()) {
     // _cputs(msg);  // _cputs cannot be used at is aborts if it fails to lock the console
     static HANDLE hcon = INVALID_HANDLE_VALUE;
-    static int write_to_console;
+    static bool hconIsConsole;
     if (hcon == INVALID_HANDLE_VALUE) {
       CONSOLE_SCREEN_BUFFER_INFO sbi;
       hcon = GetStdHandle(STD_ERROR_HANDLE);
-      write_to_console = GetConsoleScreenBufferInfo(hcon, &sbi) ? 1 : 0;
-    }
-    if (!write_to_console) {
-      fputs(msg, stderr);
-      return;
+      hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi));
     }
     const size_t len = strlen(msg);
-    if (hcon != INVALID_HANDLE_VALUE && len > 0 && len < UINT32_MAX) {
+    if (len > 0 && len < UINT32_MAX) {
       DWORD written = 0;
-      WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
+      if (hconIsConsole) {
+        WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
+      }
+      else if (hcon != INVALID_HANDLE_VALUE) {
+        // use direct write if stderr was redirected
+        WriteFile(hcon, msg, (DWORD)len, &written, NULL);
+      }
+      else {
+        // finally fall back to fputs after all
+        fputs(msg, stderr);
+      }
     }
   }
   #else

From 4f2fdf76a0393ce8dabc3f0b2585610ddaceaad7 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 16:59:24 -0800
Subject: [PATCH 73/88] add braces

---
 src/os.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/os.c b/src/os.c
index df1088cc..5277e5e4 100644
--- a/src/os.c
+++ b/src/os.c
@@ -1349,14 +1349,14 @@ static size_t mi_os_numa_nodex(void) {
     (*pGetCurrentProcessorNumberEx)(&pnum);
     USHORT nnode = 0;
     BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
-    if (ok) numa_node = nnode;
+    if (ok) { numa_node = nnode; }
   }
   else if (pGetNumaProcessorNode != NULL) {
     // Vista or earlier, use older API that is limited to 64 processors. Issue #277
     DWORD pnum = GetCurrentProcessorNumber();
     UCHAR nnode = 0;
     BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode);
-    if (ok) numa_node = nnode;
+    if (ok) { numa_node = nnode; }
   }
   return numa_node;
 }

From 8c89a77064d68c7a534604ef98f39fff9a8239d6 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 17:25:49 -0800
Subject: [PATCH 74/88] add default constructors to stl heap allocators

---
 include/mimalloc.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index a064645f..bce3de93 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -478,7 +478,7 @@ template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _m
   using typename _mi_stl_allocator_common<T>::value_type;
   using typename _mi_stl_allocator_common<T>::pointer;
 
-  _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { }    /* will not delete or destroy the passed in heap */
+  _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { }    /* will not delete nor destroy the passed in heap */
 
   #if (__cplusplus >= 201703L)  // C++17
   mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
@@ -513,7 +513,8 @@ private:
 // STL allocator allocation in a specific heap
 template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> {
   using typename _mi_heap_stl_allocator_common<T, false>::size_type;
-  mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { }  /* no delete or destroy on the passed in heap */
+  mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called
+  mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { }  // no delete nor destroy on the passed in heap 
   template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { }
 
   mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
@@ -529,6 +530,8 @@ template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x,
 // the heap is destroyed in one go on destruction -- use with care!
 template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> {
   using typename _mi_heap_stl_allocator_common<T, true>::size_type;
+  mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called
+  mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { }  // no delete nor destroy on the passed in heap 
   template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { }
 
   mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }

From 2b0421a25cbb03c2ce015cb460d04f4766c99219 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 17:38:39 -0800
Subject: [PATCH 75/88] fix -Wunused-function for mi_strn-cmp; originally by
 @rui314 in PR #662 but rebased to dev branch

---
 src/options.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/options.c b/src/options.c
index 669f57d0..44319a42 100644
--- a/src/options.c
+++ b/src/options.c
@@ -494,13 +494,6 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
   return false;
 }
 #else
-static inline int mi_strnicmp(const char* s, const char* t, size_t n) {
-  if (n==0) return 0;
-  for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
-    if (toupper(*s) != toupper(*t)) break;
-  }
-  return (n==0 ? 0 : *s - *t);
-}
 #if defined _WIN32
 // On Windows use GetEnvironmentVariable instead of getenv to work
 // reliably even when this is invoked before the C runtime is initialized.
@@ -526,6 +519,13 @@ static char** mi_get_environ(void) {
   return environ;
 }
 #endif
+static int mi_strnicmp(const char* s, const char* t, size_t n) {
+  if (n == 0) return 0;
+  for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
+    if (toupper(*s) != toupper(*t)) break;
+  }
+  return (n == 0 ? 0 : *s - *t);
+}
 static bool mi_getenv(const char* name, char* result, size_t result_size) {
   if (name==NULL) return false;
   const size_t len = strlen(name);

From 95b61b8c3d9b4e19191507592a18fb9fe31fb53b Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 17:45:15 -0800
Subject: [PATCH 76/88] forward libc interface used by wasi-libc; originally by
 @anuraaga in PR #667 but rebased to the dev branch

---
 src/alloc-override.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/alloc-override.c b/src/alloc-override.c
index ca508aa6..84a0d19d 100644
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@@ -262,7 +262,15 @@ int   reallocarr(void* p, size_t count, size_t size)    { return mi_reallocarr(p
 void* memalign(size_t alignment, size_t size)           { return mi_memalign(alignment, size); }
 void* _aligned_malloc(size_t alignment, size_t size)    { return mi_aligned_alloc(alignment, size); }
 
-#if defined(__GLIBC__) && defined(__linux__)
+#if defined(__wasi__)
+  // forward __libc interface (see PR #667)
+  void* __libc_malloc(size_t size)                      MI_FORWARD1(mi_malloc, size)
+  void* __libc_calloc(size_t count, size_t size)        MI_FORWARD2(mi_calloc, count, size)
+  void* __libc_realloc(void* p, size_t size)            MI_FORWARD2(mi_realloc, p, size)
+  void  __libc_free(void* p)                            MI_FORWARD0(mi_free, p)
+  void* __libc_memalign(size_t alignment, size_t size)  { return mi_memalign(alignment, size); }
+
+#elif defined(__GLIBC__) && defined(__linux__)
   // forward __libc interface (needed for glibc-based Linux distributions)
   void* __libc_malloc(size_t size)                      MI_FORWARD1(mi_malloc,size)
   void* __libc_calloc(size_t count, size_t size)        MI_FORWARD2(mi_calloc,count,size)

From 4d1e74aaddd3d4137ce72ead3466c9a88f9b1a2c Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 18:01:58 -0800
Subject: [PATCH 77/88] add comment

---
 include/mimalloc.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index bce3de93..3e6ba7c3 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -282,6 +282,7 @@ mi_decl_export int   mi_reserve_os_memory_ex(size_t size, bool commit, bool allo
 mi_decl_export bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 
 #if MI_MALLOC_VERSION >= 200
+// Create a heap that only allocates in the specified arena
 mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
 #endif
 

From ab99eb5830ba521dd515808b0f880bd84f5b6281 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 18:12:27 -0800
Subject: [PATCH 78/88] quote path in post-build event (issue #666

---
 ide/vs2019/mimalloc-override.vcxproj | 8 ++++----
 ide/vs2022/mimalloc-override.vcxproj | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index 182ddab1..5fa59569 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -110,7 +110,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect32.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
@@ -138,7 +138,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>copy mimalloc-redirect.dll to the output directory</Message>
@@ -170,7 +170,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect32.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
@@ -202,7 +202,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>copy mimalloc-redirect.dll to the output directory</Message>
diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj
index 87b0a1e4..e7133af4 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@@ -110,7 +110,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect32.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
@@ -138,7 +138,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>copy mimalloc-redirect.dll to the output directory</Message>
@@ -170,7 +170,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect32.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
@@ -202,7 +202,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>copy mimalloc-redirect.dll to the output directory</Message>

From b701d4cbfa132fe6c79c3f5580842e0c46e9cfc3 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 18:39:52 -0800
Subject: [PATCH 79/88] bump cmake minimal version to 3.13 to support
 CMAKE_INTERPROCEDURAL_OPTIMIZATION -- see PR 656.

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 97a4984a..0011b874 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0)
+cmake_minimum_required(VERSION 3.13)
 project(libmimalloc C CXX)
 
 set(CMAKE_C_STANDARD 11)

From 94b8cb870e1d459d4617d5f558e955fa48e6443c Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 18:40:12 -0800
Subject: [PATCH 80/88] quote path in post-build event (issue #666

---
 ide/vs2017/mimalloc-override.vcxproj | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index a1266dc9..1c6a8fda 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -110,7 +110,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect32.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
@@ -138,7 +138,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>copy mimalloc-redirect.dll to the output directory</Message>
@@ -170,7 +170,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect32.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
@@ -202,7 +202,7 @@
       <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
     </Link>
     <PostBuildEvent>
-      <Command>COPY /Y $(ProjectDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
     </PostBuildEvent>
     <PostBuildEvent>
       <Message>copy mimalloc-redirect.dll to the output directory</Message>

From ef3f651f3faeb8a9dfd95b4a82a07e263a146c35 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 19 Dec 2022 18:59:33 -0800
Subject: [PATCH 81/88] change implementation of mi_realpath to be more robust;
 see issue #660

---
 src/alloc.c            | 22 ++++++++++++++++++----
 test/main-override.cpp |  1 +
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 6e468c85..554405f1 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -781,7 +781,9 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const c
   if (s == NULL) return NULL;
   size_t n = strlen(s);
   char* t = (char*)mi_heap_malloc(heap,n+1);
-  if (t != NULL) _mi_memcpy(t, s, n + 1);
+  if (t == NULL) return NULL;
+  _mi_memcpy(t, s, n);
+  t[n] = 0;
   return t;
 }
 
@@ -832,6 +834,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const
 }
 #else
 #include <unistd.h>  // pathconf
+/*
 static size_t mi_path_max(void) {
   static size_t path_max = 0;
   if (path_max <= 0) {
@@ -842,20 +845,31 @@ static size_t mi_path_max(void) {
   }
   return path_max;
 }
-
+*/
 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
   if (resolved_name != NULL) {
     return realpath(fname,resolved_name);
   }
   else {
-    size_t n  = mi_path_max();
+    char* rname = realpath(fname, NULL);
+    if (rname == NULL) return NULL;
+    char* result = mi_heap_strdup(heap, rname);
+    free(rname);  // use regular free! (which may be redirected to our free but that's ok)
+    return result;
+  }
+  /*
+    const size_t n  = mi_path_max();
     char* buf = (char*)mi_malloc(n+1);
-    if (buf==NULL) return NULL;
+    if (buf == NULL) {
+      errno = ENOMEM;
+      return NULL;
+    }
     char* rname  = realpath(fname,buf);
     char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL`
     mi_free(buf);
     return result;
   }
+  */
 }
 #endif
 
diff --git a/test/main-override.cpp b/test/main-override.cpp
index 37d4daae..e63d605a 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -39,6 +39,7 @@ static void heap_thread_free_huge();
 
 static void test_stl_allocators();
 
+
 int main() {
   mi_stats_reset();  // ignore earlier allocations
 

From 0f796a56a98b224dc645cd2260b95e06139570cf Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 20 Dec 2022 18:59:55 -0800
Subject: [PATCH 82/88] fix bug where eager committed memory would be
 immediatedy decommitted; possible fix for issue #669

---
 src/alloc.c   |  4 ++--
 src/segment.c | 18 ++++++++++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 7bea69e9..b8270f1a 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -959,7 +959,7 @@ static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
 }
 
 
-mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
   void* p = mi_heap_malloc(heap,size);
   if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
   return p;
@@ -970,7 +970,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
 }
 
 
-mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
   size_t total;
   if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
     mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
diff --git a/src/segment.c b/src/segment.c
index 184197ef..5b4dbc7a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -574,7 +574,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) {
 }
 
 // note: can be called on abandoned segments
-static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
+static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_entries);
   mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) 
                           ? NULL : mi_span_queue_for(slice_count,tld));
@@ -594,7 +594,9 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
   }
 
   // perhaps decommit
-  mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats);
+  if (allow_decommit) {
+    mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
+  }
   
   // and push it on the free page queue (if it was not a huge page)
   if (sq != NULL) mi_span_queue_push( sq, slice );
@@ -656,12 +658,12 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
   }
 
   // and add the new free page
-  mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld);
+  mi_segment_span_free(segment, mi_slice_index(slice), slice_count, true, tld);
   return slice;
 }
 
 
-static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
+static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
   mi_assert_internal(_mi_ptr_segment(slice)==segment);
   mi_assert_internal(slice->slice_count >= slice_count);
   mi_assert_internal(slice->xblock_size > 0); // no more in free queue
@@ -669,7 +671,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz
   mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   size_t next_index = mi_slice_index(slice) + slice_count;
   size_t next_count = slice->slice_count - slice_count;
-  mi_segment_span_free(segment, next_index, next_count, tld);
+  mi_segment_span_free(segment, next_index, next_count, allow_decommit, tld);
   slice->slice_count = (uint32_t)slice_count;
 }
 
@@ -738,7 +740,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
           mi_span_queue_delete(sq, slice);
 
           if (slice->slice_count > slice_count) {
-            mi_segment_slice_split(segment, slice, slice_count, tld);
+            mi_segment_slice_split(segment, slice, slice_count, false /* don't decommit */, tld);
           }
           mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
           mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
@@ -872,7 +874,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
   segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
   if (segment->allow_decommit) {
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
+    segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
     segment->decommit_mask = decommit_mask;
     mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
     #if MI_DEBUG>2
@@ -919,7 +921,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   // initialize initial free pages
   if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
     mi_assert_internal(huge_page==NULL);
-    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, tld);
+    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld);
   }
   else {
     mi_assert_internal(huge_page!=NULL);

From aa2d00fde86a54abd6439c60daec7e5316241f91 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 21 Dec 2022 10:28:58 -0800
Subject: [PATCH 83/88] make heap_new inline again

---
 src/alloc.c | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 554405f1..472170dc 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -614,25 +614,6 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
 }
 
 
-// ------------------------------------------------------
-// ensure explicit external inline definitions are emitted!
-// ------------------------------------------------------
-
-#ifdef __cplusplus
-void* _mi_externs[] = {
-  (void*)&_mi_page_malloc,
-  (void*)&_mi_heap_malloc_zero,
-  (void*)&_mi_heap_malloc_zero_ex,
-  (void*)&mi_malloc,
-  (void*)&mi_malloc_small,
-  (void*)&mi_zalloc_small,
-  (void*)&mi_heap_malloc,
-  (void*)&mi_heap_zalloc,
-  (void*)&mi_heap_malloc_small
-};
-#endif
-
-
 // ------------------------------------------------------
 // Allocation extensions
 // ------------------------------------------------------
@@ -954,7 +935,7 @@ static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
 }
 
 
-mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
+mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
   void* p = mi_heap_malloc(heap,size);
   if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
   return p;
@@ -965,7 +946,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
 }
 
 
-mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
+mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
   size_t total;
   if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
     mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
@@ -1023,3 +1004,23 @@ mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) {
     return mi_new_realloc(p, total);
   }
 }
+
+// ------------------------------------------------------
+// ensure explicit external inline definitions are emitted!
+// ------------------------------------------------------
+
+#ifdef __cplusplus
+void* _mi_externs[] = {
+  (void*)&_mi_page_malloc,
+  (void*)&_mi_heap_malloc_zero,
+  (void*)&_mi_heap_malloc_zero_ex,
+  (void*)&mi_malloc,
+  (void*)&mi_malloc_small,
+  (void*)&mi_zalloc_small,
+  (void*)&mi_heap_malloc,
+  (void*)&mi_heap_zalloc,
+  (void*)&mi_heap_malloc_small,
+  (void*)&mi_heap_alloc_new,
+  (void*)&mi_heap_alloc_new_n
+};
+#endif

From 37d5ce94983e7103c9a0e0752db229aa60e7b0f7 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 21 Dec 2022 12:09:52 -0800
Subject: [PATCH 84/88] comment out include

---
 src/alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/alloc.c b/src/alloc.c
index 472170dc..18261fbf 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -814,8 +814,8 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const
   }
 }
 #else
-#include <unistd.h>  // pathconf
 /*
+#include <unistd.h>  // pathconf
 static size_t mi_path_max(void) {
   static size_t path_max = 0;
   if (path_max <= 0) {

From d1fff1119a52e15c1c3807efe1077024c39fe70e Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 21 Dec 2022 12:19:09 -0800
Subject: [PATCH 85/88] reorganize span free code

---
 src/segment.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 5b4dbc7a..dc98e3e7 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -406,7 +406,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
 
 
 /* -----------------------------------------------------------
-   Span management
+   Commit/Decommit ranges
 ----------------------------------------------------------- */
 
 static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) {
@@ -569,6 +569,10 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
 }
 
 
+/* -----------------------------------------------------------
+   Span free
+----------------------------------------------------------- */
+
 static bool mi_segment_is_abandoned(mi_segment_t* segment) {
   return (segment->thread_id == 0);
 }
@@ -663,17 +667,10 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
 }
 
 
-static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
-  mi_assert_internal(_mi_ptr_segment(slice)==segment);
-  mi_assert_internal(slice->slice_count >= slice_count);
-  mi_assert_internal(slice->xblock_size > 0); // no more in free queue
-  if (slice->slice_count <= slice_count) return;
-  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
-  size_t next_index = mi_slice_index(slice) + slice_count;
-  size_t next_count = slice->slice_count - slice_count;
-  mi_segment_span_free(segment, next_index, next_count, allow_decommit, tld);
-  slice->slice_count = (uint32_t)slice_count;
-}
+
+/* -----------------------------------------------------------
+   Page allocation
+----------------------------------------------------------- */
 
 // Note: may still return NULL if committing the memory failed
 static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
@@ -725,6 +722,18 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   return page;
 }
 
+static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
+  mi_assert_internal(_mi_ptr_segment(slice) == segment);
+  mi_assert_internal(slice->slice_count >= slice_count);
+  mi_assert_internal(slice->xblock_size > 0); // no more in free queue
+  if (slice->slice_count <= slice_count) return;
+  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
+  size_t next_index = mi_slice_index(slice) + slice_count;
+  size_t next_count = slice->slice_count - slice_count;
+  mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld);
+  slice->slice_count = (uint32_t)slice_count;
+}
+
 static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX);
   // search from best fit up
@@ -740,7 +749,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
           mi_span_queue_delete(sq, slice);
 
           if (slice->slice_count > slice_count) {
-            mi_segment_slice_split(segment, slice, slice_count, false /* don't decommit */, tld);
+            mi_segment_slice_split(segment, slice, slice_count, tld);
           }
           mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
           mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);

From e68f2c14796af42782400d4d4f982edbcb4832b9 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 23 Dec 2022 13:02:16 -0800
Subject: [PATCH 86/88] fix recursion issue on exit on windows, #672

---
 src/init.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/init.c b/src/init.c
index b5a98403..11c66a67 100644
--- a/src/init.c
+++ b/src/init.c
@@ -350,7 +350,11 @@ static void _mi_thread_done(mi_heap_t* default_heap);
   #endif
   static DWORD mi_fls_key = (DWORD)(-1);
   static void NTAPI mi_fls_done(PVOID value) {
-    if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
+    mi_heap_t* heap = (mi_heap_t*)value;
+    if (heap != NULL) {
+      _mi_thread_done(heap);
+      FlsSetValue(mi_fls_key, NULL);  // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672
+    }
   }
 #elif defined(MI_USE_PTHREADS)
   // use pthread local storage keys to detect thread ending

From 9adb032e9c5941f4fac941bcda234e86b3e4e95a Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 23 Dec 2022 13:04:53 -0800
Subject: [PATCH 87/88] test non-default heap

---
 test/main-override-static.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/main-override-static.c b/test/main-override-static.c
index fcdea4d3..bf1cc416 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -42,6 +42,9 @@ int main() {
   char* s = strdup("hello\n");
   free(p2);
 
+  mi_heap_t* h = mi_heap_new();
+  mi_heap_set_default(h);
+
   p2 = malloc(16);
   p1 = realloc(p1, 32);
   free(p1);
@@ -58,6 +61,7 @@ int main() {
   //mi_stats_print(NULL);
 
   // test_process_info();
+  
   return 0;
 }
 

From 4cebb48062196625b91c79311f3ff4b22f493d47 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 23 Dec 2022 13:29:06 -0800
Subject: [PATCH 88/88] bump version to v1.7.9

---
 cmake/mimalloc-config-version.cmake |  2 +-
 include/mimalloc.h                  |  2 +-
 readme.md                           | 10 ++++++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index 97b3f575..dbe8fdaa 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
 set(mi_version_major 1)
 set(mi_version_minor 7)
-set(mi_version_patch 8)
+set(mi_version_patch 9)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 3e6ba7c3..f5900336 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 178   // major + 2 digits minor
+#define MI_MALLOC_VERSION 179   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes
diff --git a/readme.md b/readme.md
index 40781200..364b974b 100644
--- a/readme.md
+++ b/readme.md
@@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the run-time systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
 
-Latest release tag: `v2.0.7` (2022-11-03).
-Latest stable  tag: `v1.7.7` (2022-11-03).
+Latest release tag: `v2.0.9` (2022-12-23).
+Latest stable  tag: `v1.7.9` (2022-12-23).
 
 mimalloc is a drop-in replacement for `malloc` and can be used in other programs
 without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@@ -78,6 +78,12 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
   and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
   (see [below](#performance)); please report if you observe any significant performance regression.
 
+* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with asan and improved [Valgrind] support. Support abitrary large
+  alignments (in particular for `std::pmr` pools). 
+  Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). 
+  Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). 
+  Various small bug fixes.
+
 * 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind] for leak testing and heap block overflow detection. Initial
   support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .