From bbd81bbbd1bbe1de30b09dcfc6da22f31c4f5768 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 15 Aug 2019 00:46:45 -0700
Subject: [PATCH 001/352] wip: new segment allocation with flexible large
 objects

---
 ide/vs2017/mimalloc-override.vcxproj         |   5 +-
 ide/vs2017/mimalloc-override.vcxproj.filters |   3 -
 ide/vs2017/mimalloc.vcxproj                  |   1 -
 ide/vs2017/mimalloc.vcxproj.filters          |   3 -
 include/mimalloc-internal.h                  |  47 +-
 include/mimalloc-types.h                     |  78 +-
 src/alloc-aligned.c                          |   2 +-
 src/alloc.c                                  |   8 +-
 src/heap.c                                   |  10 +-
 src/init.c                                   |  21 +-
 src/page-queue.c                             |  18 +-
 src/page.c                                   |  24 +-
 src/segment.c                                | 892 ++++++++++++-------
 src/stats.c                                  |   8 +-
 test/main-override-static.c                  | 162 +++-
 15 files changed, 843 insertions(+), 439 deletions(-)
diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index 7d452b55..d9bce9c0 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -95,7 +95,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <SupportJustMyCode>false</SupportJustMyCode>
       <CompileAs>Default</CompileAs>
@@ -118,7 +118,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <SupportJustMyCode>false</SupportJustMyCode>
       <CompileAs>Default</CompileAs>
@@ -225,7 +225,6 @@
     <ClCompile Include="..\..\src\alloc.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\memory.c" />
     <ClCompile Include="..\..\src\options.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\page-queue.c">
diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters
index df0bf5ed..639a9d4e 100644
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@@ -58,9 +58,6 @@
     <ClCompile Include="..\..\src\init.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\src\memory.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\src\alloc-override.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj
index 3e453471..5b151da7 100644
--- a/ide/vs2017/mimalloc.vcxproj
+++ b/ide/vs2017/mimalloc.vcxproj
@@ -227,7 +227,6 @@
     <ClCompile Include="..\..\src\alloc.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\memory.c" />
     <ClCompile Include="..\..\src\options.c" />
     <ClCompile Include="..\..\src\page-queue.c">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters
index 28d94e99..d32080f5 100644
--- a/ide/vs2017/mimalloc.vcxproj.filters
+++ b/ide/vs2017/mimalloc.vcxproj.filters
@@ -47,9 +47,6 @@
     <ClCompile Include="..\..\src\init.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\src\memory.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index f6f2e2ae..f17d8af0 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -39,10 +39,20 @@ bool       _mi_preloading();  // true while the C runtime is not ready
 
 // os.c
 size_t     _mi_os_page_size(void);
+size_t     _mi_os_large_page_size();
 void       _mi_os_init(void);                                      // called from process init
 void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 
+bool      _mi_os_protect(void* addr, size_t size);
+bool      _mi_os_unprotect(void* addr, size_t size);
+bool      _mi_os_commit(void* p, size_t size, mi_stats_t* stats);
+bool      _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
+bool      _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
+bool      _mi_os_unreset(void* p, size_t size, mi_stats_t* stats);
+void*     _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld);
+
+/*
 // memory.c
 void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld);
 void*      _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld);
@@ -55,6 +65,7 @@ bool       _mi_mem_protect(void* addr, size_t size);
 bool       _mi_mem_unprotect(void* addr, size_t size);
 
 void        _mi_mem_collect(mi_stats_t* stats);
+*/
 
 // "segment.c"
 mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
@@ -62,7 +73,7 @@ void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t*
 void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
 bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
 void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
-uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page
+uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
 
 // "page.c"
 void*      _mi_malloc_generic(mi_heap_t* heap, size_t size)  mi_attr_noexcept mi_attr_malloc;
@@ -233,27 +244,47 @@ static inline mi_segment_t* _mi_ptr_segment(const void* p) {
   return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK);
 }
 
+static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
+  mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0);
+  return (mi_page_t*)(s);
+}
+
+static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
+  mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0);
+  return (mi_slice_t*)(p);
+}
+
+static size_t mi_slice_index(const mi_slice_t* slice) {
+  mi_segment_t* segment = _mi_ptr_segment(slice);
+  ptrdiff_t index = slice - segment->slices;
+  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count);
+  return index;
+}
+
 // Segment belonging to a page
 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
   mi_segment_t* segment = _mi_ptr_segment(page);
-  mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]);
+  mi_assert_internal(segment == NULL || page == mi_slice_to_page(&segment->slices[mi_slice_index(mi_page_to_slice((mi_page_t*)page))]));
   return segment;
 }
 
 // Get the page containing the pointer
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
-  // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0];  // huge pages
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
   mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE);
-  uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
-  mi_assert_internal(idx < segment->capacity);
-  mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
-  return &((mi_segment_t*)segment)->pages[idx];
+  uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT;
+  mi_assert_internal(idx < segment->slice_count);
+  mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; 
+  mi_slice_t* slice = slice0 - slice0->slice_offset;  // adjust to the block that holds the page data
+  mi_assert_internal(slice->slice_count > slice0->slice_offset);
+  mi_assert_internal(slice->slice_offset == 0);
+  mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count);
+  return mi_slice_to_page(slice);
 }
 
 // Quick page start for initialized pages
 static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
-  return _mi_segment_page_start(segment, page, page->block_size, page_size);
+  return _mi_segment_page_start(segment, page, page_size);
 }
 
 // Get the page containing the pointer
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 67ad8516..7e14daca 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -74,27 +74,28 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
-#define MI_SMALL_PAGE_SHIFT               (13 + MI_INTPTR_SHIFT)      // 64kb
-#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)  // 512kb
-#define MI_LARGE_PAGE_SHIFT               ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4mb
-#define MI_SEGMENT_SHIFT                  ( MI_LARGE_PAGE_SHIFT)      // 4mb
+#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64kb
+#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
+
+#define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
+#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SEGMENT_SLICE_SHIFT)  // 512kb
+
 
 // Derived constants
-#define MI_SEGMENT_SIZE                   (1<<MI_SEGMENT_SHIFT)
-#define MI_SEGMENT_MASK                   ((uintptr_t)MI_SEGMENT_SIZE - 1)
+#define MI_SEGMENT_SIZE                   ((size_t)1<<MI_SEGMENT_SHIFT)
+#define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
+#define MI_SEGMENT_SLICE_SIZE             ((size_t)1 << MI_SEGMENT_SLICE_SHIFT) 
+#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
 
 #define MI_SMALL_PAGE_SIZE                (1<<MI_SMALL_PAGE_SHIFT)
 #define MI_MEDIUM_PAGE_SIZE               (1<<MI_MEDIUM_PAGE_SHIFT)
-#define MI_LARGE_PAGE_SIZE                (1<<MI_LARGE_PAGE_SHIFT)
 
-#define MI_SMALL_PAGES_PER_SEGMENT        (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
-#define MI_MEDIUM_PAGES_PER_SEGMENT       (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
-#define MI_LARGE_PAGES_PER_SEGMENT        (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
+#define MI_MEDIUM_SIZE_MAX                (MI_MEDIUM_PAGE_SIZE/8)   // 64kb on 64-bit
+#define MI_MEDIUM_WSIZE_MAX               (MI_MEDIUM_SIZE_MAX/MI_INTPTR_SIZE)   // 64kb on 64-bit
+
+#define MI_LARGE_SIZE_MAX                 (MI_SEGMENT_SIZE/4)       // 16mb on 64-bit
+#define MI_LARGE_WSIZE_MAX                (MI_LARGE_SIZE_MAX/MI_INTPTR_SIZE)
 
-#define MI_MEDIUM_SIZE_MAX                (MI_MEDIUM_PAGE_SIZE/4)   // 128kb on 64-bit
-#define MI_LARGE_SIZE_MAX                 (MI_LARGE_PAGE_SIZE/4)    // 1Mb on 64-bit
-#define MI_LARGE_WSIZE_MAX                (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT)
-#define MI_HUGE_SIZE_MAX                  (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)  // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
 
 // Minimal alignment necessary. On most platforms 16 bytes are needed
 // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
@@ -103,7 +104,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Maximum number of size classes. (spaced exponentially in 12.5% increments)
 #define MI_BIN_HUGE  (73U)
 
-#if (MI_LARGE_WSIZE_MAX >= 655360)
+#if (MI_MEDIUM_WSIZE_MAX >= 655360)
 #error "define more bins"
 #endif
 
@@ -154,20 +155,20 @@ typedef uintptr_t mi_thread_free_t;
 // - using `uint16_t` does not seem to slow things down
 typedef struct mi_page_s {
   // "owned" by the segment
-  uint8_t               segment_idx;       // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
-  bool                  segment_in_use:1;  // `true` if the segment allocated this page
-  bool                  is_reset:1;        // `true` if the page memory was reset
-  bool                  is_committed:1;    // `true` if the page virtual memory is committed
+  size_t                slice_count;       // slices in this page (0 if not a page)
+  uint16_t              slice_offset;      // distance from the actual page data slice (0 if a page)
+  bool                  is_reset;          // `true` if the page memory was reset
+  bool                  is_committed;      // `true` if the page virtual memory is committed
 
   // layout like this to optimize access in `mi_malloc` and `mi_free`
   uint16_t              capacity;          // number of blocks committed
   uint16_t              reserved;          // number of blocks reserved in memory
-                                           // 16 bits padding
+
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   #if MI_SECURE
   uintptr_t             cookie;            // random cookie to encode the free lists
   #endif
-  mi_page_flags_t       flags;             // threadid:62 | has_aligned:1 | in_full:1
+  mi_page_flags_t       flags;
   size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
   
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
@@ -182,7 +183,7 @@ typedef struct mi_page_s {
 
 // improve page index calculation
 #if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
-  void*                 padding[1];        // 12 words on 64-bit
+  // void*                 padding[1];        // 12 words on 64-bit
 #elif MI_INTPTR_SIZE==4
   // void*                 padding[1];         // 12 words on 32-bit
 #endif
@@ -193,30 +194,37 @@ typedef struct mi_page_s {
 typedef enum mi_page_kind_e {
   MI_PAGE_SMALL,    // small blocks go into 64kb pages inside a segment
   MI_PAGE_MEDIUM,   // medium blocks go into 512kb pages inside a segment
-  MI_PAGE_LARGE,    // larger blocks go into a single page spanning a whole segment
-  MI_PAGE_HUGE      // huge blocks (>512kb) are put into a single page in a segment of the exact size (but still 2mb aligned)
+  MI_PAGE_LARGE,    // larger blocks go into a page of just one block
+  MI_PAGE_HUGE,     // huge blocks (>16mb) are put into a single page in a single segment.
 } mi_page_kind_t;
 
+typedef enum mi_segment_kind_e {
+  MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
+  MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
+} mi_segment_kind_t;
+
+typedef mi_page_t mi_slice_t;
+
 // Segments are large allocated memory blocks (2mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
 typedef struct mi_segment_s {
   struct mi_segment_s* next;
   struct mi_segment_s* prev;
-  struct mi_segment_s* abandoned_next;
+  struct mi_segment_s* abandoned_next;  // abandoned segment stack: `used == abandoned`
   size_t          abandoned;   // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
-  size_t          used;        // count of pages in use (`used <= capacity`)
-  size_t          capacity;    // count of available pages (`#free + used`)
+  size_t          used;        // count of pages in use 
   size_t          segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
   size_t          segment_info_size;  // space we are using from the first page for segment meta-data and possible guard pages.
   uintptr_t       cookie;      // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
   size_t          memid;       // id for the os-level memory manager
+  bool            all_committed;
 
   // layout like this to optimize access in `mi_free`
-  size_t          page_shift;  // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
-  volatile uintptr_t thread_id;   // unique id of the thread owning this segment
-  mi_page_kind_t  page_kind;   // kind of pages: small, large, or huge
-  mi_page_t       pages[1];    // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
+  mi_segment_kind_t kind; 
+  uintptr_t         thread_id;
+  size_t            slice_count; // slices in this segment (at most MI_SLICES_PER_SEGMENT)
+  mi_slice_t        slices[MI_SLICES_PER_SEGMENT];
 } mi_segment_t;
 
 
@@ -326,13 +334,13 @@ typedef struct mi_stats_s {
   mi_stat_count_t commit_calls;
   mi_stat_count_t threads;
   mi_stat_count_t huge;
-  mi_stat_count_t giant;
+  mi_stat_count_t large;
   mi_stat_count_t malloc;
   mi_stat_count_t segments_cache;
   mi_stat_counter_t page_no_retire;
   mi_stat_counter_t searches;
   mi_stat_counter_t huge_count;
-  mi_stat_counter_t giant_count;
+  mi_stat_counter_t large_count;
 #if MI_STAT>1
   mi_stat_count_t normal[MI_BIN_HUGE+1];
 #endif
@@ -367,11 +375,11 @@ typedef struct mi_segment_queue_s {
   mi_segment_t* last;
 } mi_segment_queue_t;
 
+#define MI_SEGMENT_BIN_MAX (35)     // 35 == mi_segment_bin(MI_SEGMENT_SIZE)
 
 // Segments thread local data
 typedef struct mi_segments_tld_s {
-  mi_segment_queue_t  small_free;   // queue of segments with free small pages
-  mi_segment_queue_t  medium_free;  // queue of segments with free medium pages
+  mi_page_queue_t     pages[MI_SEGMENT_BIN_MAX+1];  // free pages inside segments
   size_t              count;        // current number of segments;
   size_t              peak_count;   // peak number of segments
   size_t              current_size; // current size of all segments
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 24f6c440..c605d637 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -43,7 +43,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* heap, size_t size, size_t
   if (p == NULL) return NULL;
 
   // .. and align within the allocation
-  mi_page_set_has_aligned( _mi_ptr_page(p), true );
+  mi_page_set_has_aligned(_mi_ptr_page(p), true);
   uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment);
   mi_assert_internal(adjust % sizeof(uintptr_t) == 0);
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
diff --git a/src/alloc.c b/src/alloc.c
index bbe504a1..b5a48bde 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -226,7 +226,7 @@ void mi_free(void* p) mi_attr_noexcept
 #endif
 
   mi_page_t* const page = _mi_segment_page_of(segment, p);
-
+  
 #if (MI_STAT>1)
   mi_heap_t* heap = mi_heap_get_default();
   mi_heap_stat_decrease( heap, malloc, mi_usable_size(p));
@@ -235,9 +235,9 @@ void mi_free(void* p) mi_attr_noexcept
   }
   // huge page stat is accounted for in `_mi_page_retire`
 #endif
-
-  const uintptr_t tid = _mi_thread_id();
-  if (mi_likely(tid == page->flags)) {  // if equal, the thread id matches and it is not a full page, nor has aligned blocks
+  
+  uintptr_t tid = _mi_thread_id();
+  if (mi_likely(page->flags == tid)) {  
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)p;
     mi_block_set_next(page, block, page->local_free);
diff --git a/src/heap.c b/src/heap.c
index 768cab96..7b5d7a07 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -150,7 +150,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
 
   // collect regions
   if (collect >= FORCE && _mi_is_main_thread()) {
-    _mi_mem_collect(&heap->tld->stats);
+    // _mi_mem_collect(&heap->tld->stats);
   }
 }
 
@@ -245,9 +245,9 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);  
 
   // stats
-  if (page->block_size > MI_LARGE_SIZE_MAX) {
-    if (page->block_size > MI_HUGE_SIZE_MAX) {
-      _mi_stat_decrease(&heap->tld->stats.giant,page->block_size);
+  if (page->block_size > MI_MEDIUM_SIZE_MAX) {
+    if (page->block_size <= MI_LARGE_SIZE_MAX) {
+      _mi_stat_decrease(&heap->tld->stats.large,page->block_size);
     }
     else {
       _mi_stat_decrease(&heap->tld->stats.huge, page->block_size);
@@ -255,7 +255,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   }
   #if (MI_STAT>1)
   size_t inuse = page->used - page->thread_freed;
-  if (page->block_size <= MI_LARGE_SIZE_MAX)  {
+  if (page->block_size <= MI_MEDIUM_SIZE_MAX)  {
     mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse);
   }
   mi_heap_stat_decrease(heap,malloc, page->block_size * inuse);  // todo: off for aligned blocks...
diff --git a/src/init.c b/src/init.c
index 77ce4aad..ff0fa76c 100644
--- a/src/init.c
+++ b/src/init.c
@@ -21,7 +21,7 @@ const mi_page_t _mi_page_empty = {
   NULL, 0, 0,
   0, NULL, NULL, NULL
   #if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
-  , { NULL }
+  // , { NULL }
   #endif
 };
 
@@ -43,8 +43,8 @@ const mi_page_t _mi_page_empty = {
     QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
     QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
     QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
-    QNULL(MI_LARGE_WSIZE_MAX + 1  /* 655360, Huge queue */), \
-    QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
+    QNULL(MI_MEDIUM_WSIZE_MAX + 1  /* 655360, Huge queue */), \
+    QNULL(MI_MEDIUM_WSIZE_MAX + 2) /* Full queue */ }
 
 #define MI_STAT_COUNT_NULL()  {0,0,0,0}
 
@@ -91,14 +91,23 @@ const mi_heap_t _mi_heap_empty = {
 mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
 
 
+// Empty page queues for every bin
+#define MI_SEGMENT_PAGE_QUEUES_EMPTY \
+  { QNULL(0), \
+    QNULL(     1), QNULL(     2), QNULL(     3), QNULL(     4), QNULL(     5), QNULL(     6), QNULL(     7), QNULL(     8), /* 8 */ \
+    QNULL(    10), QNULL(    12), QNULL(    14), QNULL(    16), QNULL(    20), QNULL(    24), QNULL(    28), QNULL(    32), /* 16 */ \
+    QNULL(    40), QNULL(    48), QNULL(    56), QNULL(    64), QNULL(    80), QNULL(    96), QNULL(   112), QNULL(   128), /* 24 */ \
+    QNULL(   160), QNULL(   192), QNULL(   224),  /* 27 */ }
+
+
 #define tld_main_stats  ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
 
 static mi_tld_t tld_main = {
   0,
   &_mi_heap_main,
-  { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
-  { 0, tld_main_stats },       // os
-  { MI_STATS_NULL }            // stats
+  { MI_SEGMENT_PAGE_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
+  { 0, tld_main_stats },              // os
+  { MI_STATS_NULL }                   // stats
 };
 
 mi_heap_t _mi_heap_main = {
diff --git a/src/page-queue.c b/src/page-queue.c
index e59620c2..f396e233 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -34,15 +34,15 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) {
-  return (pq->block_size == (MI_LARGE_SIZE_MAX+sizeof(uintptr_t)));
+  return (pq->block_size == (MI_MEDIUM_SIZE_MAX+sizeof(uintptr_t)));
 }
 
 static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) {
-  return (pq->block_size == (MI_LARGE_SIZE_MAX+(2*sizeof(uintptr_t))));
+  return (pq->block_size == (MI_MEDIUM_SIZE_MAX+(2*sizeof(uintptr_t))));
 }
 
 static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
-  return (pq->block_size > MI_LARGE_SIZE_MAX);
+  return (pq->block_size > MI_MEDIUM_SIZE_MAX);
 }
 
 /* -----------------------------------------------------------
@@ -116,7 +116,7 @@ extern inline uint8_t _mi_bin(size_t size) {
     bin = (uint8_t)wsize;
   }
   #endif
-  else if (wsize > MI_LARGE_WSIZE_MAX) {
+  else if (wsize > MI_MEDIUM_WSIZE_MAX) {
     bin = MI_BIN_HUGE;
   }
   else {
@@ -147,7 +147,7 @@ size_t _mi_bin_size(uint8_t bin) {
 
 // Good size for allocation
 size_t mi_good_size(size_t size) mi_attr_noexcept {
-  if (size <= MI_LARGE_SIZE_MAX) {
+  if (size <= MI_MEDIUM_SIZE_MAX) {
     return _mi_bin_size(_mi_bin(size));
   }
   else {
@@ -245,7 +245,7 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
 static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
   mi_assert_internal(page != NULL);
   mi_assert_expensive(mi_page_queue_contains(queue, page));
-  mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue))  || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
+  mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(queue))  || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
   if (page->prev != NULL) page->prev->next = page->next;
   if (page->next != NULL) page->next->prev = page->prev;
   if (page == queue->last)  queue->last = page->prev;
@@ -268,7 +268,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
   mi_assert_internal(page->heap == NULL);
   mi_assert_internal(!mi_page_queue_contains(queue, page));
   mi_assert_internal(page->block_size == queue->block_size ||
-                      (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
+                      (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
                         (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
 
   mi_page_set_in_full(page, mi_page_queue_is_full(queue));
@@ -297,8 +297,8 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
   mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) ||
                      (page->block_size == to->block_size && mi_page_queue_is_full(from)) ||
                      (page->block_size == from->block_size && mi_page_queue_is_full(to)) ||
-                     (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) ||
-                     (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_full(to)));
+                     (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(to)) ||
+                     (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_full(to)));
 
   if (page->prev != NULL) page->prev->next = page->next;
   if (page->next != NULL) page->next->prev = page->prev;
diff --git a/src/page.c b/src/page.c
index 9d645b6c..b1fd1e69 100644
--- a/src/page.c
+++ b/src/page.c
@@ -74,7 +74,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   
   mi_segment_t* segment = _mi_page_segment(page);
   uint8_t* start = _mi_page_start(segment,page,NULL);
-  mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL));
+  mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
   mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page));
   //mi_assert_internal(start + page->capacity*page->block_size == page->top);
 
@@ -102,7 +102,7 @@ bool _mi_page_is_valid(mi_page_t* page) {
     mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id);
     mi_page_queue_t* pq = mi_page_queue_of(page);
     mi_assert_internal(mi_page_queue_contains(pq, page));
-    mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || mi_page_is_in_full(page));
+    mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_MEDIUM_SIZE_MAX || mi_page_is_in_full(page));
     mi_assert_internal(mi_heap_contains_queue(page->heap,pq));
   }
   return true;
@@ -356,9 +356,9 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   mi_page_set_has_aligned(page, false);
 
   // account for huge pages here
-  if (page->block_size > MI_LARGE_SIZE_MAX) {
-    if (page->block_size > MI_HUGE_SIZE_MAX) {
-      _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size);
+  if (page->block_size > MI_MEDIUM_SIZE_MAX) {
+    if (page->block_size <= MI_LARGE_SIZE_MAX) {
+      _mi_stat_decrease(&page->heap->tld->stats.large, page->block_size);
     }
     else {
       _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
@@ -554,7 +554,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(block_size > 0);
   // set fields
   size_t page_size;
-  _mi_segment_page_start(segment, page, block_size, &page_size);
+  _mi_segment_page_start(segment, page, &page_size);
   page->block_size = block_size;
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
@@ -702,7 +702,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
 ----------------------------------------------------------- */
 
 // A huge page is allocated directly without being in a queue
-static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
+static mi_page_t* mi_large_page_alloc(mi_heap_t* heap, size_t size) {
   size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t);
   mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
   mi_page_queue_t* pq = mi_page_queue(heap,block_size);
@@ -711,9 +711,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
   if (page != NULL) {
     mi_assert_internal(mi_page_immediate_available(page));
     mi_assert_internal(page->block_size == block_size);
-    if (page->block_size > MI_HUGE_SIZE_MAX) {
-      _mi_stat_increase(&heap->tld->stats.giant, block_size);
-      _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1);
+    if (page->block_size <= MI_LARGE_SIZE_MAX) {
+      _mi_stat_increase(&heap->tld->stats.large, block_size);
+      _mi_stat_counter_increase(&heap->tld->stats.large_count, 1);
     }
     else {
       _mi_stat_increase(&heap->tld->stats.huge, block_size);
@@ -744,12 +744,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
   
   // huge allocation?
   mi_page_t* page;
-  if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) {
+  if (mi_unlikely(size > MI_MEDIUM_SIZE_MAX)) {
     if (mi_unlikely(size >= (SIZE_MAX - MI_MAX_ALIGN_SIZE))) {
       page = NULL;
     }
     else {
-      page = mi_huge_page_alloc(heap,size);
+      page = mi_large_page_alloc(heap,size);
     }
   }
   else {
diff --git a/src/segment.c b/src/segment.c
index 736345bf..31117857 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -15,16 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* -----------------------------------------------------------
   Segment allocation
-  We allocate pages inside big OS allocated "segments"
-  (4mb on 64-bit). This is to avoid splitting VMA's on Linux
-  and reduce fragmentation on other OS's. Each thread
-  owns its own segments.
-
-  Currently we have:
-  - small pages (64kb), 64 in one segment
-  - medium pages (512kb), 8 in one segment
-  - large pages (4mb), 1 in one segment
-  - huge blocks > MI_LARGE_SIZE_MAX (512kb) are directly allocated by the OS
+  
 
   In any case the memory for a segment is virtual and only
   committed on demand (i.e. we are careful to not touch the memory
@@ -35,75 +26,103 @@ terms of the MIT license. A copy of the license can be found in the file
   be reclaimed by still running threads, much like work-stealing.
 ----------------------------------------------------------- */
 
-
 /* -----------------------------------------------------------
-  Queue of segments containing free pages
+   Bins
 ----------------------------------------------------------- */
-
-
-#if (MI_DEBUG>1)
-static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) {
-  mi_assert_internal(segment != NULL);
-  mi_segment_t* list = queue->first;
-  while (list != NULL) {
-    if (list == segment) break;
-    mi_assert_internal(list->next==NULL || list->next->prev == list);
-    mi_assert_internal(list->prev==NULL || list->prev->next == list);
-    list = list->next;
-  }
-  return (list == segment);
+// Use bit scan forward to quickly find the first zero bit if it is available
+#if defined(_MSC_VER)
+#include <intrin.h>
+static inline size_t mi_bsr(uintptr_t x) {
+  if (x==0) return 8*MI_INTPTR_SIZE;
+  DWORD idx;
+  #if (MI_INTPTR_SIZE==8)
+  _BitScanReverse64(&idx, x);
+  #else
+  _BitScanReverse(&idx, x);
+  #endif
+  return idx;
 }
+#elif defined(__GNUC__) || defined(__clang__)
+static inline size_t mi_bsr(uintptr_t x) {
+  return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x));
+}
+#else
+#error "define bsr for your platform"
 #endif
 
-static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) {
-  return (queue->first == NULL);
+static size_t mi_slice_bin4(size_t slice_count) {
+  if (slice_count==0) return 0;
+  mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT);
+  size_t s = mi_bsr(slice_count);
+  if (s <= 1) return slice_count;
+  size_t bin = ((s << 1) | (slice_count >> (s - 1))&0x01);
+  return bin;
 }
 
-static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) {
-  mi_assert_expensive(mi_segment_queue_contains(queue, segment));
-  if (segment->prev != NULL) segment->prev->next = segment->next;
-  if (segment->next != NULL) segment->next->prev = segment->prev;
-  if (segment == queue->first) queue->first = segment->next;
-  if (segment == queue->last)  queue->last = segment->prev;
-  segment->next = NULL;
-  segment->prev = NULL;
+static size_t mi_slice_bin8(size_t slice_count) {
+  if (slice_count==0) return 0;
+  mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT);
+  size_t s = mi_bsr(slice_count);
+  if (s <= 2) return slice_count;
+  size_t bin = ((s << 2) | (slice_count >> (s - 2))&0x03) - 5;
+  return bin;
 }
 
-static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) {
-  mi_assert_expensive(!mi_segment_queue_contains(queue, segment));
-  segment->next = NULL;
-  segment->prev = queue->last;
-  if (queue->last != NULL) {
-    mi_assert_internal(queue->last->next == NULL);
-    queue->last->next = segment;
-    queue->last = segment;
-  }
-  else {
-    queue->last = queue->first = segment;
-  }
+static size_t mi_slice_bin(size_t slice_count) {
+  mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE);
+  mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) == MI_SEGMENT_BIN_MAX);
+  size_t bin = (slice_count==0 ? 0 : mi_slice_bin8(slice_count));
+  mi_assert_internal(bin >= 0 && bin <= MI_SEGMENT_BIN_MAX);
+  return bin;
 }
 
-static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi_segments_tld_t* tld) {
-  if (kind == MI_PAGE_SMALL) return &tld->small_free;
-  else if (kind == MI_PAGE_MEDIUM) return &tld->medium_free;
-  else return NULL;
+
+/* -----------------------------------------------------------
+   Page Queues
+----------------------------------------------------------- */
+static bool mi_page_queue_is_empty(mi_page_queue_t* pq) {
+  return (pq->first == NULL);
 }
 
-static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  return mi_segment_free_queue_of_kind(segment->page_kind, tld);
+static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq)
+{
+  mi_page_t* page = pq->first;
+  if (page==NULL) return NULL;
+  mi_assert_internal(page->prev==NULL);
+  pq->first = page->next;
+  if (page->next == NULL) pq->last = NULL;
+  else page->next->prev = NULL;
+  page->next = NULL;
+  page->prev = NULL;    // paranoia
+  page->block_size = 1; // no more free
+  return page;
 }
 
-// remove from free queue if it is in one
-static void mi_segment_remove_from_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); // may be NULL
-  bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment));
-  if (in_queue) {
-    mi_segment_queue_remove(queue, segment);
-  }
+static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) {
+  // todo: or push to the end?
+  mi_assert_internal(page->prev == NULL && page->next==NULL);
+  page->prev = NULL; // paranoia
+  page->next = pq->first;
+  pq->first = page;
+  if (page->next != NULL) page->next->prev = page;
+                     else pq->last = page;
+  page->block_size = 0; // free                     
 }
 
-static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  mi_segment_enqueue(mi_segment_free_queue(segment, tld), segment);
+static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) {
+  size_t bin = mi_slice_bin(slice_count);
+  return &tld->pages[bin];
+}
+
+static void mi_page_queue_remove(mi_page_queue_t* pq, mi_page_t* page) {
+  mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0);
+  if (page->prev != NULL) page->prev->next = page->next;
+  else pq->first = page->next;
+  if (page->next != NULL) page->next->prev = page->prev;
+  else pq->last = page->prev;
+  page->prev = NULL;
+  page->next = NULL;
+  page->block_size = 1; // no more free
 }
 
 
@@ -112,31 +131,47 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t
 ----------------------------------------------------------- */
 
 #if (MI_DEBUG > 1)
-static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld);
-  bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment));
-  if (in_queue) {
-    mi_assert_expensive(mi_segment_queue_contains(queue, segment));
+static bool mi_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) {
+  for (mi_page_t* p = pq->first; p != NULL; p = p->next) {
+    if (p==page) return true;
   }
-  return in_queue;
+  return false;
 }
 
-static size_t mi_segment_pagesize(mi_segment_t* segment) {
-  return ((size_t)1 << segment->page_shift);
-}
-static bool mi_segment_is_valid(mi_segment_t* segment) {
+static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment != NULL);
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
-  mi_assert_internal(segment->used <= segment->capacity);
   mi_assert_internal(segment->abandoned <= segment->used);
-  size_t nfree = 0;
-  for (size_t i = 0; i < segment->capacity; i++) {
-    if (!segment->pages[i].segment_in_use) nfree++;
+  mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
+  //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
+  mi_slice_t* slice = &segment->slices[0];
+  size_t page_count = 0;
+  mi_page_queue_t* pq;
+  while(slice < &segment->slices[segment->slice_count]) {
+    mi_assert_internal(slice->slice_count > 0);
+    mi_assert_internal(slice->slice_offset == 0);
+    if (slice->block_size > 0) { // a page in use, all slices need their back offset set
+      page_count++;
+      for (size_t i = 0; i < slice->slice_count; i++) {
+        mi_assert_internal((slice+i)->slice_offset == i);
+        mi_assert_internal(i==0 || (slice+i)->slice_count == 0);
+        mi_assert_internal(i==0 || (slice+i)->block_size == 1);
+      }
+    }
+    else {  // free range of slices; only last slice needs a valid back offset
+      mi_slice_t* end = slice + slice->slice_count -  1;
+      mi_assert_internal(slice == end - end->slice_offset);
+      mi_assert_internal(slice == end || end->slice_count == 0 );
+      mi_assert_internal(end->block_size == 0);
+      if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) {
+        pq = mi_page_queue_for(slice->slice_count,tld);
+        mi_assert_internal(mi_page_queue_contains(pq,mi_slice_to_page(slice)));
+      }
+    }    
+    slice = slice + slice->slice_count;
   }
-  mi_assert_internal(nfree + segment->used == segment->capacity);
-  mi_assert_internal(segment->thread_id == _mi_thread_id()); // or 0
-  mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
-                     (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size));
+  mi_assert_internal(slice == &segment->slices[segment->slice_count]);
+  mi_assert_internal(page_count == segment->used + 1);
   return true;
 }
 #endif
@@ -145,28 +180,32 @@ static bool mi_segment_is_valid(mi_segment_t* segment) {
  Segment size calculations
 ----------------------------------------------------------- */
 
-// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set)
-uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size)
+// Start of the page available memory; can be used on uninitialized pages
+uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) 
 {
-  size_t   psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift);
-  uint8_t* p     = (uint8_t*)segment + page->segment_idx*psize;
-
- if (page->segment_idx == 0) {
-   // the first page starts after the segment info (and possible guard page)
-   p     += segment->segment_info_size;
-   psize -= segment->segment_info_size;
-   // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
-   if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) {
-     size_t adjust = block_size - ((uintptr_t)p % block_size);
-     if (adjust < block_size) {
-       p     += adjust;
-       psize -= adjust;
-     }
-     mi_assert_internal((uintptr_t)p % block_size == 0);
-   }
+  mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
+  ptrdiff_t idx     = slice - segment->slices;
+  size_t psize      = slice->slice_count*MI_SEGMENT_SLICE_SIZE;
+  uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
+  /*
+  if (idx == 0) {
+    // the first page starts after the segment info (and possible guard page)
+    p     += segment->segment_info_size;
+    psize -= segment->segment_info_size;
+    // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
+    // to ensure this, we over-estimate and align with the OS page size
+    const size_t asize = _mi_os_page_size();
+    uint8_t* q = (uint8_t*)_mi_align_up((uintptr_t)p, _mi_os_page_size());
+    if (p < q) {
+      psize -= (q - p);
+      p      = q;
+    }
+    mi_assert_internal((uintptr_t)p % _mi_os_page_size() == 0);
   }
+  */
+
   long secure = mi_option_get(mi_option_secure);
-  if (secure > 1 || (secure == 1 && page->segment_idx == segment->capacity - 1)) {
+  if (secure > 1 || (secure == 1 && slice == &segment->slices[segment->slice_count - 1])) {
     // secure == 1: the last page has an os guard page at the end
     // secure >  1: every page has an os guard page
     psize -= _mi_os_page_size();
@@ -178,34 +217,23 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
   return p;
 }
 
-static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) {
-  /*
-  if (mi_option_is_enabled(mi_option_secure)) {
-    // always reserve maximally so the protection falls on
-    // the same address area, as we need to reuse them from the caches interchangably.
-    capacity = MI_SMALL_PAGES_PER_SEGMENT;
-  }
-  */
-  size_t minsize   = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */;
+static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_size) {
+  size_t page_size = _mi_os_page_size();
+  size_t isize     = _mi_align_up(sizeof(mi_segment_t), page_size);
   size_t guardsize = 0;
-  size_t isize     = 0;
-
-  if (!mi_option_is_enabled(mi_option_secure)) {
-    // normally no guard pages
-    isize = _mi_align_up(minsize, (16 > MI_MAX_ALIGN_SIZE ? 16 : MI_MAX_ALIGN_SIZE));
-  }
-  else {
+  
+  if (mi_option_is_enabled(mi_option_secure)) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data (and one at the end of the segment)
-    size_t page_size = _mi_os_page_size();
-    isize = _mi_align_up(minsize, page_size);
-    guardsize = page_size;
-    required = _mi_align_up(required, page_size);
+    guardsize =  page_size;
+    required  = _mi_align_up(required, page_size);
   }
 ;
   if (info_size != NULL) *info_size = isize;
-  if (pre_size != NULL)  *pre_size  = isize + guardsize;
-  return (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_PAGE_HUGE_ALIGN) );
+  if (pre_size != NULL)  *pre_size = isize + guardsize;
+  size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_SEGMENT_SLICE_SIZE) );
+  mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0);
+  return segment_size;
 }
 
 
@@ -229,15 +257,15 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
   segment->thread_id = 0;
   mi_segments_track_size(-((long)segment_size),tld);
   if (mi_option_is_enabled(mi_option_secure)) {
-    _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
+    _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
   }
-  _mi_mem_free(segment, segment_size, segment->memid, tld->stats);
+  _mi_os_free(segment, segment_size, /*segment->memid,*/ tld->stats);
 }
 
 
 // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
-// and no more than 4.
-#define MI_SEGMENT_CACHE_MAX      (4)
+// and no more than 1.
+#define MI_SEGMENT_CACHE_MAX      (1)
 #define MI_SEGMENT_CACHE_FRACTION (8)
 
 // note: returned segment may be partially reset
@@ -270,14 +298,13 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
 }
 
 static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld));
   mi_assert_internal(segment->next == NULL);
   if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
     return false;
   }
   mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
   if (mi_option_is_enabled(mi_option_cache_reset)) {
-    _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
+    _mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
   }
   segment->next = tld->cache;
   tld->cache = segment;
@@ -297,64 +324,119 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
 }
 
 
+/* -----------------------------------------------------------
+   Slices 
+----------------------------------------------------------- */
+
+
+static uint8_t* mi_slice_start(const mi_slice_t* slice) {
+  mi_segment_t* segment = _mi_ptr_segment(slice);
+  return ((uint8_t*)segment + (mi_slice_index(slice)*MI_SEGMENT_SLICE_SIZE));
+}
+
+static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) {
+  return &segment->slices[segment->slice_count-1];
+}
+
+/* -----------------------------------------------------------
+   Page management
+----------------------------------------------------------- */
+
+
+static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
+  mi_assert_internal(slice_index >= 0 && slice_index < segment->slice_count);
+  size_t bin = mi_slice_bin(slice_count);
+  if (slice_count==0) slice_count = 1;
+  mi_assert_internal(slice_count >= 0 && slice_index + slice_count - 1 < segment->slice_count);
+
+  // set first and last slice (the intermediates can be undetermined)
+  mi_slice_t* slice = &segment->slices[slice_index];
+  slice->slice_count = slice_count;
+  slice->slice_offset = 0;
+  if (slice_count > 1) {
+    mi_slice_t* end = &segment->slices[slice_index + slice_count - 1];
+    end->slice_count = 0;
+    end->slice_offset = (uint16_t)slice_count - 1;
+    end->block_size = 0;
+  }
+  // and push it on the free page queue
+  mi_page_queue_push( &tld->pages[bin], mi_slice_to_page(slice) );
+}
+
+static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) {
+  mi_segment_t* segment = _mi_page_segment(page);
+  mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0);
+  size_t slice_index = mi_slice_index(mi_page_to_slice(page));
+  mi_segment_page_init(segment,slice_index,page->slice_count,tld);
+}
+
+
+static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segments_tld_t* tld) {
+  mi_assert_internal(page->slice_count >= slice_count);
+  mi_assert_internal(page->block_size > 0); // no more in free queue
+  if (page->slice_count <= slice_count) return;
+  mi_segment_t* segment = _mi_page_segment(page);
+  size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count;
+  size_t next_count = page->slice_count - slice_count;
+  mi_segment_page_init( segment, next_index, next_count, tld );  
+  page->slice_count = slice_count;
+}
+
+static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { 
+  // search from best fit up
+  mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld);
+  if (slice_count == 0) slice_count = 1;
+  while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX] && mi_page_queue_is_empty(pq)) {
+    pq++;
+  }
+  if (pq > &tld->pages[MI_SEGMENT_BIN_MAX]) {
+    // could not find a page.. 
+    return NULL;
+  }
+
+  // pop the page and split to the right size
+  mi_page_t* page = mi_page_queue_pop(pq);
+  mi_assert_internal(page != NULL && page->slice_count >= slice_count && page->slice_offset == 0);
+  if (page->slice_count > slice_count) {
+    mi_segment_page_split(page, slice_count, tld);
+  }
+  mi_assert_internal(page != NULL && page->slice_count == slice_count);
+  return page;
+}
+
+static void mi_segment_page_remove(mi_slice_t* slice, mi_segments_tld_t* tld) {
+  mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0);
+  mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld);
+  mi_page_queue_remove(pq, mi_slice_to_page(slice));
+}
+
+
 /* -----------------------------------------------------------
    Segment allocation
 ----------------------------------------------------------- */
 
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
   // calculate needed sizes first
-  size_t capacity;
-  if (page_kind == MI_PAGE_HUGE) {
-    mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0);
-    capacity = 1;
-  }
-  else {
-    mi_assert_internal(required == 0);
-    size_t page_size = (size_t)1 << page_shift;
-    capacity = MI_SEGMENT_SIZE / page_size;
-    mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0);
-    mi_assert_internal(capacity >= 1 && capacity <= MI_SMALL_PAGES_PER_SEGMENT);
-  }
   size_t info_size;
   size_t pre_size;
-  size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size);
+  size_t segment_size = mi_segment_size(required, &pre_size, &info_size);
+  size_t slice_count = segment_size / MI_SEGMENT_SLICE_SIZE;
   mi_assert_internal(segment_size >= required);
-  size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
+  //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0);
 
   // Try to get it from our thread local cache first
-  bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM);
-  bool protection_still_good = false;
+  bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit) 
+                || required > 0; // huge page
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
-  if (segment != NULL) {
-    if (mi_option_is_enabled(mi_option_secure)) {
-      if (segment->page_kind != page_kind) {
-        _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs
-      }
-      else {
-        protection_still_good = true; // otherwise, the guard pages are still in place
-      }
-    }
-    if (!mi_option_is_enabled(mi_option_eager_commit)) {
-      if (page_kind > MI_PAGE_MEDIUM) {
-        _mi_mem_commit(segment, segment->segment_size, tld->stats);
-      }
-      else {
-        // ok, commit (and unreset) on demand again
-      }
-    }
-    else if (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset)) {
-      _mi_mem_unreset(segment, segment->segment_size, tld->stats);
-    }
-  }
-  else {
+  if (segment==NULL) {
     // Allocate the segment from the OS
-    size_t memid;
-    segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &memid, os_tld);
+    size_t memid = 0;
+    segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, /* &memid,*/ os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {
-      _mi_mem_commit(segment, info_size, tld->stats);
+      _mi_os_commit(segment, info_size, tld->stats);
     }
     segment->memid = memid;
     mi_segments_track_size((long)segment_size, tld);
@@ -367,65 +449,73 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     segment->memid = memid;
   }
 
-  if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) {
+  if (mi_option_is_enabled(mi_option_secure)) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data
-    mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0);
-    _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) );
+    mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0);
+    _mi_os_protect((uint8_t*)segment + info_size, (pre_size - info_size));
     size_t os_page_size = _mi_os_page_size();
-    if (mi_option_get(mi_option_secure) <= 1) {
-      // and protect the last page too
-      _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size );
+    // and protect the last page too
+    _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size);        
+    slice_count--; // don't use the last slice :-(
+  }
+
+  // initialize segment info
+  segment->segment_size = segment_size;
+  segment->segment_info_size = pre_size;
+  segment->thread_id = _mi_thread_id();
+  segment->cookie = _mi_ptr_cookie(segment);
+  segment->slice_count = slice_count;
+  segment->all_committed = commit;
+  segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
+  _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
+
+  // reserve first slices for segment info
+  size_t islice_count = (segment->segment_info_size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE;
+  for (size_t i = 0; i < islice_count; i++) {
+    mi_slice_t* slice = &segment->slices[i];
+    if (i==0) {
+      slice->slice_count = islice_count;
+      slice->block_size = islice_count * MI_SEGMENT_SLICE_SIZE;
     }
     else {
-      // protect every page
-      for (size_t i = 0; i < capacity; i++) {
-        _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size );
-      }
+      slice->slice_offset = (uint16_t)i;
+      slice->block_size = 1;
     }
   }
 
-  segment->page_kind  = page_kind;
-  segment->capacity   = capacity;
-  segment->page_shift = page_shift;
-  segment->segment_size = segment_size;
-  segment->segment_info_size = pre_size;
-  segment->thread_id  = _mi_thread_id();
-  segment->cookie = _mi_ptr_cookie(segment);
-  for (uint8_t i = 0; i < segment->capacity; i++) {
-    segment->pages[i].segment_idx = i;
-    segment->pages[i].is_reset = false;
-    segment->pages[i].is_committed = commit;
+  // initialize initial free pages
+  if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
+    mi_segment_page_init(segment, islice_count, segment->slice_count - islice_count, tld);
   }
-  _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
-  //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
   return segment;
 }
 
 
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
-  UNUSED(force);
-  //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment);
-  mi_assert(segment != NULL);
-  mi_segment_remove_from_free_queue(segment,tld);
+  mi_assert_internal(segment != NULL);  
+  mi_assert_internal(segment->next == NULL);
+  mi_assert_internal(segment->prev == NULL);
+  mi_assert_internal(segment->used == 0);
 
-  mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment));
-  mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment));
-  mi_assert(segment->next == NULL);
-  mi_assert(segment->prev == NULL);
-  _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);  
-
-  // update reset memory statistics
-  /*
-  for (uint8_t i = 0; i < segment->capacity; i++) {
-    mi_page_t* page = &segment->pages[i];
-    if (page->is_reset) {
-      page->is_reset = false;
-      mi_stat_decrease( tld->stats->reset,mi_page_size(page));
+  // Remove the free pages
+  mi_slice_t* slice = &segment->slices[0];
+  size_t page_count = 0;
+  while (slice < mi_segment_last_slice(segment)) {
+    mi_assert_internal(slice->slice_count > 0);
+    mi_assert_internal(slice->slice_offset == 0);
+    mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages ..
+    if (slice->block_size == 0) {
+      mi_segment_page_remove(slice, tld);
     }
+    page_count++;
+    slice = slice + slice->slice_count;
   }
-  */
+  mi_assert_internal(page_count == 2); // first page is allocated by the segment itself
 
+  // stats
+  _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
+  
   if (!force && mi_segment_cache_push(segment, tld)) {
     // it is put in our cache
   }
@@ -436,100 +526,143 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
 }
 
 /* -----------------------------------------------------------
-  Free page management inside a segment
+   Page allocation
 ----------------------------------------------------------- */
 
+static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) 
+{
+  mi_assert_internal(required <= MI_LARGE_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
 
-static bool mi_segment_has_free(const mi_segment_t* segment) {
-  return (segment->used < segment->capacity);
-}
+  // find a free page
+  size_t page_size = _mi_align_up(required,MI_SEGMENT_SLICE_SIZE);
+  size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
+  mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
+  if (page==NULL) {
+    // no free page, allocate a new segment and try again
+    if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL;  // OOM    
+    return mi_segment_page_alloc(page_kind, required, tld, os_tld);
+  }
+  mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
 
-static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) {
-  mi_assert_internal(mi_segment_has_free(segment));
-  mi_assert_expensive(mi_segment_is_valid(segment));
-  for (size_t i = 0; i < segment->capacity; i++) {
-    mi_page_t* page = &segment->pages[i];
-    if (!page->segment_in_use) {
-      if (page->is_reset || !page->is_committed) {
-        size_t psize;
-        uint8_t* start = _mi_page_start(segment, page, &psize);
-        mi_assert_internal(!(page->is_reset && !page->is_committed));
-        if (!page->is_committed) {
-          page->is_committed = true;
-          _mi_mem_commit(start,psize,stats);
-        }
-        if (page->is_reset) {
-          page->is_reset = false;
-          _mi_mem_unreset(start, psize, stats);
-        }
-      }
-      return page;
+  // set slice back pointers and commit/unreset
+  mi_segment_t* segment = _mi_page_segment(page);
+  mi_slice_t* slice = mi_page_to_slice(page);
+  bool commit = false;
+  bool unreset = false;
+  for (size_t i = 0; i < page->slice_count; i++, slice++) {
+    slice->slice_offset = (uint16_t)i;
+    slice->block_size = 1;
+    if (i > 0) slice->slice_count = 0;
+    if (!segment->all_committed && !slice->is_committed) {
+      slice->is_committed = true;
+      commit = true;    
+    }
+    if (slice->is_reset) {
+      slice->is_reset = false;
+      unreset = true;      
     }
   }
-  mi_assert(false);
-  return NULL;
+  uint8_t* page_start = mi_slice_start(mi_page_to_slice(page));
+  if(commit) { _mi_os_commit(page_start, page_size, tld->stats); }
+  if(unreset){ _mi_os_unreset(page_start, page_size, tld->stats); }
+
+  // initialize the page and return
+  mi_assert_internal(segment->thread_id == _mi_thread_id()); 
+  segment->used++;
+  mi_page_init_flags(page, segment->thread_id);
+  return page;
+}
+
+static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) {
+  mi_assert_internal(page != NULL && page->slice_count > 0 && page->slice_offset == 0 && page->block_size > 0);
+  mi_segment_t* segment = _mi_page_segment(page);
+  mi_assert_internal(segment->used > 0);
+  segment->used--;
+
+  // free and coalesce the page
+  mi_slice_t* slice = mi_page_to_slice(page);
+  size_t slice_count = slice->slice_count;
+  mi_slice_t* next = slice + slice->slice_count;
+  mi_assert_internal(next <= mi_segment_last_slice(segment) + 1);
+  if (next <= mi_segment_last_slice(segment) && next->block_size==0) {
+    // free next block -- remove it from free and merge
+    mi_assert_internal(next->slice_count > 0 && next->slice_offset==0);
+    slice_count += next->slice_count; // extend
+    mi_segment_page_remove(next, tld);
+  }
+  if (slice > segment->slices) {
+    mi_slice_t* prev = slice - 1;
+    prev = prev - prev->slice_offset;
+    mi_assert_internal(prev >= segment->slices);
+    if (prev->block_size==0) {
+      // free previous slice -- remove it from free and merge
+      mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
+      slice_count += prev->slice_count;
+      mi_segment_page_remove(prev, tld);
+      slice = prev;
+    }
+  }
+  
+  // and add the new free page
+  mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld);
+  mi_assert_expensive(mi_segment_is_valid(segment,tld));
 }
 
 
 /* -----------------------------------------------------------
-   Free
+   Page Free
 ----------------------------------------------------------- */
 
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
 
-static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) {
-  UNUSED(stats);
-  mi_assert_internal(page->segment_in_use);
+static void mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
+  mi_assert_internal(page->block_size > 0);
   mi_assert_internal(mi_page_all_free(page));
-  mi_assert_internal(page->is_committed);
+  mi_segment_t* segment = _mi_ptr_segment(page);
+  mi_assert_internal(segment->all_committed || page->is_committed);
   size_t inuse = page->capacity * page->block_size;
-  _mi_stat_decrease(&stats->page_committed, inuse);
-  _mi_stat_decrease(&stats->pages, 1);
+  _mi_stat_decrease(&tld->stats->page_committed, inuse);
+  _mi_stat_decrease(&tld->stats->pages, 1);
 
   // reset the page memory to reduce memory pressure?
   if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
     size_t psize;
     uint8_t* start = _mi_page_start(segment, page, &psize);
     page->is_reset = true;
-    _mi_mem_reset(start, psize, stats);
+    _mi_os_reset(start, psize, tld->stats);
   }
 
   // zero the page data
-  uint8_t idx = page->segment_idx; // don't clear the index
-  bool is_reset = page->is_reset;  // don't clear the reset flag
+  size_t slice_count = page->slice_count; // don't clear the slice_count
+  bool is_reset = page->is_reset;         // don't clear the reset flag
   bool is_committed = page->is_committed; // don't clear the commit flag
   memset(page, 0, sizeof(*page));
-  page->segment_idx = idx;
-  page->segment_in_use = false;
+  page->slice_count = slice_count;
   page->is_reset = is_reset;
   page->is_committed = is_committed;
-  segment->used--;
+  page->block_size = 1;
+
+  // and free it
+  mi_segment_page_free_coalesce(page, tld);
 }
 
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 {
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_expensive(mi_segment_is_valid(segment));
+  mi_assert_expensive(mi_segment_is_valid(segment,tld));
 
   // mark it as free now
-  mi_segment_page_clear(segment, page, tld->stats);
+  mi_segment_page_clear(page, tld);
 
   if (segment->used == 0) {
     // no more used pages; remove from the free list and free the segment
     mi_segment_free(segment, force, tld);
   }
-  else {
-    if (segment->used == segment->abandoned) {
-      // only abandoned pages; remove from free list and abandon
-      mi_segment_abandon(segment,tld);
-    }
-    else if (segment->used + 1 == segment->capacity) {
-      mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // for now we only support small and medium pages
-      // move back to segments  free list
-      mi_segment_insert_in_free_queue(segment,tld);
-    }
-  }
+  else if (segment->used == segment->abandoned) {
+    // only abandoned pages; remove from free list and abandon
+    mi_segment_abandon(segment,tld);
+  }  
 }
 
 
@@ -548,10 +681,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used == segment->abandoned);
   mi_assert_internal(segment->used > 0);
   mi_assert_internal(segment->abandoned_next == NULL);
-  mi_assert_expensive(mi_segment_is_valid(segment));
-  // remove the segment from the free page queue if needed
-  mi_segment_remove_from_free_queue(segment,tld);
-  mi_assert_internal(segment->next == NULL && segment->prev == NULL);
+  mi_assert_expensive(mi_segment_is_valid(segment,tld));
+  
   // all pages in the segment are abandoned; add it to the abandoned list
   segment->thread_id = 0;
   do {
@@ -565,7 +696,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_expensive(mi_segment_is_valid(segment));
+  mi_assert_expensive(mi_segment_is_valid(segment,tld));
   segment->abandoned++;
   _mi_stat_increase(&tld->stats->pages_abandoned, 1);
   mi_assert_internal(segment->abandoned <= segment->used);
@@ -583,7 +714,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
   }
   else {
     atmost = abandoned_count/8;    // at most 1/8th of all outstanding (estimated)
-    if (atmost < 8) atmost = 8;    // but at least 8
+    if (atmost < 2) atmost = 2;    // but at least 2
   }
 
   // for `atmost` `reclaimed` abandoned segments...
@@ -597,42 +728,44 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 
     // got it.
     mi_atomic_decrement(&abandoned_count);
-    segment->thread_id = _mi_thread_id();
+    mi_assert_expensive(mi_segment_is_valid(segment, tld));
     segment->abandoned_next = NULL;
     mi_segments_track_size((long)segment->segment_size,tld);
     mi_assert_internal(segment->next == NULL && segment->prev == NULL);
-    mi_assert_expensive(mi_segment_is_valid(segment));
     _mi_stat_decrease(&tld->stats->segments_abandoned,1);
 
-    // add its abandoned pages to the current thread
-    mi_assert(segment->abandoned == segment->used);
-    for (size_t i = 0; i < segment->capacity; i++) {
-      mi_page_t* page = &segment->pages[i];
-      if (page->segment_in_use) {
+    mi_slice_t* slice = &segment->slices[0];
+    while (slice < mi_segment_last_slice(segment)) {
+      mi_assert_internal(slice->slice_count > 0);
+      mi_assert_internal(slice->slice_offset == 0);
+      mi_page_t* page = mi_slice_to_page(slice);
+      slice = slice + slice->slice_count;
+      if (page->block_size > 0) { // a page in use
         segment->abandoned--;
-        mi_assert(page->next == NULL);
+        mi_assert_internal(page->next == NULL && page->prev==NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
         if (mi_page_all_free(page)) {
           // if everything free by now, free the page
-          mi_segment_page_clear(segment,page,tld->stats);
+          mi_segment_page_clear(page, tld);
         }
         else {
           // otherwise reclaim it
-          mi_page_init_flags(page,segment->thread_id);
-          _mi_page_reclaim(heap,page);
+          mi_page_init_flags(page, segment->thread_id);
+          _mi_page_reclaim(heap, page);
         }
       }
+      else {  // free range of slices; add to the free pages        
+        mi_segment_page_add_free(page,tld);
+      }
     }
+
     mi_assert(segment->abandoned == 0);
+    segment->thread_id = _mi_thread_id();  // only now for valid checks
     if (segment->used == 0) {  // due to page_clear
       mi_segment_free(segment,false,tld);
     }
     else {
-      reclaimed++;
-      // add its free pages to the the current thread free small segment queue
-      if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) {
-        mi_segment_insert_in_free_queue(segment,tld);
-      }
+      reclaimed++;      
     }
   }
   return (reclaimed>0);
@@ -643,64 +776,16 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
    Small page allocation
 ----------------------------------------------------------- */
 
-// Allocate a small page inside a segment.
-// Requires that the page has free pages
-static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  mi_assert_internal(mi_segment_has_free(segment));
-  mi_page_t* page = mi_segment_find_free(segment, tld->stats);
-  page->segment_in_use = true;
-  mi_page_init_flags(page,segment->thread_id);
-  segment->used++;
-  mi_assert_internal(segment->used <= segment->capacity);
-  if (segment->used == segment->capacity) {
-    // if no more free pages, remove from the queue
-    mi_assert_internal(!mi_segment_has_free(segment));
-    mi_segment_remove_from_free_queue(segment,tld);
-  }
-  return page;
-}
-
-static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
-  mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld);
-  if (mi_segment_queue_is_empty(free_queue)) {
-    mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld);
-    if (segment == NULL) return NULL;
-    mi_segment_enqueue(free_queue, segment);
-  }
-  mi_assert_internal(free_queue->first != NULL);
-  return mi_segment_page_alloc_in(free_queue->first,tld);
-}
-
-static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
-  return mi_segment_page_alloc(MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld);
-}
-
-static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
-  return mi_segment_page_alloc(MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld);
-}
-
-/* -----------------------------------------------------------
-   large page allocation
------------------------------------------------------------ */
-
-static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
-  mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld);
-  if (segment == NULL) return NULL;
-  segment->used = 1;
-  mi_page_t* page = &segment->pages[0];
-  page->segment_in_use = true;
-  mi_page_init_flags(page,segment->thread_id);
-  return page;
-}
-
 static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
-  mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld);
+  mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld);
   if (segment == NULL) return NULL;
   mi_assert_internal(segment->segment_size - segment->segment_info_size >= size);
   segment->used = 1;
-  mi_page_t* page = &segment->pages[0];
-  page->segment_in_use = true;
+  mi_page_t* page = mi_slice_to_page(&segment->slices[0]);
+  page->slice_count = segment->slice_count;
+  page->slice_offset = 0;
+  page->block_size = size;  
   mi_page_init_flags(page,segment->thread_id);
   return page;
 }
@@ -708,25 +793,144 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
 /* -----------------------------------------------------------
    Page allocation and free
 ----------------------------------------------------------- */
+/*
 static bool mi_is_good_fit(size_t bsize, size_t size) {
   // good fit if no more than 25% wasted
   return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4));
 }
+*/
 
 mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_page_t* page;
-  if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
-    page = mi_segment_small_page_alloc(tld,os_tld);
+  if (block_size <= MI_SMALL_SIZE_MAX) {// || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
+    page = mi_segment_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld);
   }
-  else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
-    page = mi_segment_medium_page_alloc(tld, os_tld);
+  else if (block_size <= MI_MEDIUM_SIZE_MAX) {// || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
+    page = mi_segment_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld);
   }
-  else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) {
-    page = mi_segment_large_page_alloc(tld, os_tld);
+  else if (block_size <= MI_LARGE_SIZE_MAX) {
+    page = mi_segment_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld);
   }
   else {
     page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
   }
-  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page)));
+  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
   return page;
 }
+
+
+/* -----------------------------------------------------------
+  The following functions are to reliably find the segment or
+  block that encompasses any pointer p (or NULL if it is not
+  in any of our segments).
+  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (128mb)
+  set to 1 if it contains the segment meta data.
+----------------------------------------------------------- */
+
+#if (MI_INTPTR_SIZE==8)
+#define MI_MAX_ADDRESS    ((size_t)1 << 44)   // 16TB 
+#else
+#define MI_MAX_ADDRESS    ((size_t)1 << 31)   // 2Gb
+#endif
+
+#define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
+#define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
+#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
+
+static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE];  // 1KiB per TB with 128MiB segments
+
+static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
+  mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB?  
+  uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE;
+  *bitidx = segindex % (8*MI_INTPTR_SIZE);
+  return (segindex / (8*MI_INTPTR_SIZE));
+}
+
+static void mi_segment_map_allocated_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
+  if (index==0) return;
+  uintptr_t mask;
+  uintptr_t newmask;
+  do {
+    mask = mi_segment_map[index];
+    newmask = (mask | ((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_compare_exchange(&mi_segment_map[index], newmask, mask));
+}
+
+static void mi_segment_map_freed_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
+  if (index == 0) return;
+  uintptr_t mask;
+  uintptr_t newmask;
+  do {
+    mask = mi_segment_map[index];
+    newmask = (mask & ~((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_compare_exchange(&mi_segment_map[index], newmask, mask));
+}
+
+// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
+static mi_segment_t* _mi_segment_of(const void* p) {
+  mi_segment_t* segment = _mi_ptr_segment(p);
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  // fast path: for any pointer to valid small/medium/large object or first 4MiB in huge
+  if (mi_likely((mi_segment_map[index] & ((uintptr_t)1 << bitidx)) != 0)) {
+    return segment; // yes, allocated by us
+  }
+  if (index==0) return NULL;
+  // search downwards for the first segment in case it is an interior pointer 
+  // could be slow but searches in 256MiB steps trough valid huge objects
+  // note: we could maintain a lowest index to speed up the path for invalid pointers?
+  size_t lobitidx;
+  size_t loindex;
+  uintptr_t lobits = mi_segment_map[index] & (((uintptr_t)1 << bitidx) - 1);
+  if (lobits != 0) {
+    loindex = index;
+    lobitidx = _mi_bsr(lobits);
+  }
+  else {
+    loindex = index - 1;
+    while (loindex > 0 && mi_segment_map[loindex] == 0) loindex--;
+    if (loindex==0) return NULL;
+    lobitidx = _mi_bsr(mi_segment_map[loindex]);
+  }
+  // take difference as the addresses could be larger than the MAX_ADDRESS space.
+  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
+  segment = (mi_segment_t*)((uint8_t*)segment - diff);
+
+  if (segment == NULL) return NULL;
+  mi_assert_internal((void*)segment < p);
+  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
+  mi_assert_internal(cookie_ok);
+  if (mi_unlikely(!cookie_ok)) return NULL;
+  if (((uint8_t*)segment + segment->segment_size) <= (uint8_t*)p) return NULL; // outside the range
+  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + segment->segment_size);
+  return segment;
+}
+
+// Is this a valid pointer in our heap?
+static bool  mi_is_valid_pointer(const void* p) {
+  return (_mi_segment_of(p) != NULL);
+}
+
+// Return the full segment range belonging to a pointer
+static void* mi_segment_range_of(const void* p, size_t* size) {
+  mi_segment_t* segment = _mi_segment_of(p);
+  if (segment == NULL) {
+    if (size != NULL) *size = 0;
+    return NULL;
+  }
+  else {
+    if (size != NULL) *size = segment->segment_size;
+    return segment;
+  }
+}
+
+bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return mi_is_valid_pointer(p);
+}
+
diff --git a/src/stats.c b/src/stats.c
index e7d398b2..a9a022fb 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -106,11 +106,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
   mi_stat_add(&stats->malloc, &src->malloc, 1);
   mi_stat_add(&stats->segments_cache, &src->segments_cache, 1);
   mi_stat_add(&stats->huge, &src->huge, 1);
-  mi_stat_add(&stats->giant, &src->giant, 1);
+  mi_stat_add(&stats->large, &src->large, 1);
   mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
   mi_stat_counter_add(&stats->searches, &src->searches, 1);
   mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
-  mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1);
+  mi_stat_counter_add(&stats->large_count, &src->large_count, 1);
 #if MI_STAT>1
   for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
     if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) {
@@ -232,11 +232,11 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n
   mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out);
   mi_stat_print(&normal, "normal", 1, out);
   mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out);
-  mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out);
+  mi_stat_print(&stats->large, "giant", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out);
   mi_stat_count_t total = { 0,0,0,0 };
   mi_stat_add(&total, &normal, 1);
   mi_stat_add(&total, &stats->huge, 1);
-  mi_stat_add(&total, &stats->giant, 1);
+  mi_stat_add(&total, &stats->large, 1);
   mi_stat_print(&total, "total", 1, out);
   _mi_fprintf(out, "malloc requested:     ");
   mi_print_amount(stats->malloc.allocated, 1, out);
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 6ddf4f37..7f20268a 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -6,8 +6,168 @@
 #include <mimalloc.h>
 #include <mimalloc-override.h>  // redefines malloc etc.
 
+#include <stdint.h>
+#include <stdbool.h>
+
+#define MI_INTPTR_SIZE 8
+#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
+
+#define MI_BIN_HUGE 100
+//#define MI_ALIGN2W
+
+// Bit scan reverse: return the index of the highest bit.
+static inline uint8_t mi_bsr32(uint32_t x);
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#include <intrin.h>
+static inline uint8_t mi_bsr32(uint32_t x) {
+  uint32_t idx;
+  _BitScanReverse((DWORD*)&idx, x);
+  return idx;
+}
+#elif defined(__GNUC__) || defined(__clang__)
+static inline uint8_t mi_bsr32(uint32_t x) {
+  return (31 - __builtin_clz(x));
+}
+#else
+static inline uint8_t mi_bsr32(uint32_t x) {
+  // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
+  static const uint8_t debruijn[32] = {
+     31,  0, 22,  1, 28, 23, 18,  2, 29, 26, 24, 10, 19,  7,  3, 12,
+     30, 21, 27, 17, 25,  9,  6, 11, 20, 16,  8,  5, 15,  4, 14, 13,
+  };
+  x |= x >> 1;
+  x |= x >> 2;
+  x |= x >> 4;
+  x |= x >> 8;
+  x |= x >> 16;
+  x++;
+  return debruijn[(x*0x076be629) >> 27];
+}
+#endif
+
+// Bit scan reverse: return the index of the highest bit.
+uint8_t _mi_bsr(uintptr_t x) {
+  if (x == 0) return 0;
+  #if MI_INTPTR_SIZE==8
+  uint32_t hi = (x >> 32);
+  return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
+  #elif MI_INTPTR_SIZE==4
+  return mi_bsr32(x);
+  #else
+  # error "define bsr for non-32 or 64-bit platforms"
+  #endif
+}
+
+static inline size_t _mi_wsize_from_size(size_t size) {
+  return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
+}
+
+// Return the bin for a given field size.
+// Returns MI_BIN_HUGE if the size is too large.
+// We use `wsize` for the size in "machine word sizes",
+// i.e. byte size == `wsize*sizeof(void*)`.
+extern inline uint8_t _mi_bin8(size_t size) {
+  size_t wsize = _mi_wsize_from_size(size);
+  uint8_t bin;
+  if (wsize <= 1) {
+    bin = 1;
+  }
+  #if defined(MI_ALIGN4W)
+  else if (wsize <= 4) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+  #elif defined(MI_ALIGN2W)
+  else if (wsize <= 8) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+  #else
+  else if (wsize <= 8) {
+    bin = (uint8_t)wsize;
+  }
+  #endif
+  else if (wsize > MI_LARGE_WSIZE_MAX) {
+    bin = MI_BIN_HUGE;
+  }
+  else {
+    #if defined(MI_ALIGN4W)
+    if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
+    #endif
+    wsize--;
+    // find the highest bit
+    uint8_t b = mi_bsr32((uint32_t)wsize);
+    // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
+    // - adjust with 3 because we use do not round the first 8 sizes
+    //   which each get an exact bin
+    bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
+  }
+  return bin;
+}
+
+extern inline uint8_t _mi_bin4(size_t size) {
+  size_t wsize = _mi_wsize_from_size(size);
+  uint8_t bin;
+  if (wsize <= 1) {
+    bin = 1;
+  }
+  #if defined(MI_ALIGN4W)
+  else if (wsize <= 4) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+  #elif defined(MI_ALIGN2W)
+  else if (wsize <= 8) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+  #else
+  else if (wsize <= 8) {
+    bin = (uint8_t)wsize;
+  }
+  #endif
+  else if (wsize > MI_LARGE_WSIZE_MAX) {
+    bin = MI_BIN_HUGE;
+  }
+  else {
+    uint8_t b = mi_bsr32((uint32_t)wsize);
+    bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
+  }
+  return bin;
+}
+
+size_t _mi_binx4(size_t bsize) {
+  if (bsize==0) return 0;
+  uint8_t b = mi_bsr32((uint32_t)bsize);
+  if (b <= 1) return bsize;
+  size_t bin =  ((b << 1) | (bsize >> (b - 1))&0x01);
+  return bin;
+}
+
+size_t _mi_binx8(size_t bsize) {
+  if (bsize==0) return 0;
+  uint8_t b = mi_bsr32((uint32_t)bsize);
+  if (b <= 2) return bsize;
+  size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
+  return bin;
+}
+
+void mi_bins() {
+  //printf("  QNULL(1), /* 0 */ \\\n  ");
+  size_t last_bin = 1;
+  for (size_t bsize = 0; bsize < 8*1024; bsize++) {
+    size_t size = bsize * 64 * 1024;
+    size_t bin = _mi_binx8(bsize);
+    if (bin != last_bin) {      
+      printf("bsize: %6zd, size: %6zd, bin: %6zd\n", bsize, size, bin);
+      //printf("QNULL(%6zd), ", wsize);
+      //if (last_bin%8 == 0) printf("/* %i */ \\\n  ", last_bin);
+      last_bin = bin;
+    }
+  }
+}
+
 int main() {
   mi_version();
+  mi_bins();
   void* p1 = malloc(78);
   void* p2 = malloc(24);
   free(p1);
@@ -25,7 +185,7 @@ int main() {
   //p1 = mi_malloc(32);
   //free(p1);
   //p2 = malloc(32);
-  //mi_free(p2);
+  //mi_free(p2);  
   mi_stats_print(NULL);
   return 0;
 }

From f2bafbc57f0604c74bf47fbd105d16a7bb951bcc Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 15 Aug 2019 11:49:56 -0700
Subject: [PATCH 002/352] wip: new segment allocation

---
 CMakeLists.txt              |   1 -
 include/mimalloc-internal.h |   2 +-
 include/mimalloc-types.h    |   2 +-
 src/heap.c                  |   2 +-
 src/init.c                  |  13 +--
 src/page.c                  |   2 +-
 src/segment.c               | 156 ++++++++++++++++++++++--------------
 src/static.c                |   2 +-
 src/stats.c                 |   2 +-
 test/main-override-static.c |  13 ++-
 10 files changed, 116 insertions(+), 79 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 39a671a0..a5be39b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,7 +16,6 @@ set(mi_install_dir "lib/mimalloc-${mi_version}")
 set(mi_sources
     src/stats.c
     src/os.c
-    src/memory.c
     src/segment.c
     src/page.c
     src/alloc.c
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index f17d8af0..e8fa1ba1 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -271,7 +271,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
 // Get the page containing the pointer
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
-  mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE);
+  mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE);
   uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT;
   mi_assert_internal(idx < segment->slice_count);
   mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; 
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 7e14daca..f4042e60 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -375,7 +375,7 @@ typedef struct mi_segment_queue_s {
   mi_segment_t* last;
 } mi_segment_queue_t;
 
-#define MI_SEGMENT_BIN_MAX (35)     // 35 == mi_segment_bin(MI_SEGMENT_SIZE)
+#define MI_SEGMENT_BIN_MAX (35)     // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)
 
 // Segments thread local data
 typedef struct mi_segments_tld_s {
diff --git a/src/heap.c b/src/heap.c
index 7b5d7a07..69084731 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -255,7 +255,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   }
   #if (MI_STAT>1)
   size_t inuse = page->used - page->thread_freed;
-  if (page->block_size <= MI_MEDIUM_SIZE_MAX)  {
+  if (page->block_size <= MI_LARGE_SIZE_MAX)  {
     mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse);
   }
   mi_heap_stat_decrease(heap,malloc, page->block_size * inuse);  // todo: off for aligned blocks...
diff --git a/src/init.c b/src/init.c
index ff0fa76c..d48c1a30 100644
--- a/src/init.c
+++ b/src/init.c
@@ -92,13 +92,14 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
 
 
 // Empty page queues for every bin
+#define SQNULL(sz)  { NULL, NULL, sz }
 #define MI_SEGMENT_PAGE_QUEUES_EMPTY \
-  { QNULL(0), \
-    QNULL(     1), QNULL(     2), QNULL(     3), QNULL(     4), QNULL(     5), QNULL(     6), QNULL(     7), QNULL(     8), /* 8 */ \
-    QNULL(    10), QNULL(    12), QNULL(    14), QNULL(    16), QNULL(    20), QNULL(    24), QNULL(    28), QNULL(    32), /* 16 */ \
-    QNULL(    40), QNULL(    48), QNULL(    56), QNULL(    64), QNULL(    80), QNULL(    96), QNULL(   112), QNULL(   128), /* 24 */ \
-    QNULL(   160), QNULL(   192), QNULL(   224),  /* 27 */ }
-
+  { SQNULL(1), \
+    SQNULL(     1), SQNULL(     2), SQNULL(     3), SQNULL(     4), SQNULL(     5), SQNULL(     6), SQNULL(     7), SQNULL(    10), /*  8 */ \
+    SQNULL(    12), SQNULL(    14), SQNULL(    16), SQNULL(    20), SQNULL(    24), SQNULL(    28), SQNULL(    32), SQNULL(    40), /* 16 */ \
+    SQNULL(    48), SQNULL(    56), SQNULL(    64), SQNULL(    80), SQNULL(    96), SQNULL(   112), SQNULL(   128), SQNULL(   160), /* 24 */ \
+    SQNULL(   192), SQNULL(   224), SQNULL(   256), SQNULL(   320), SQNULL(   384), SQNULL(   448), SQNULL(   512), SQNULL(   640), /* 32 */ \
+    SQNULL(   768), SQNULL(   896), SQNULL(  1024) /* 35 */ }
 
 #define tld_main_stats  ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
 
diff --git a/src/page.c b/src/page.c
index b1fd1e69..17dd98fb 100644
--- a/src/page.c
+++ b/src/page.c
@@ -99,7 +99,7 @@ bool _mi_page_is_valid(mi_page_t* page) {
   #endif
   if (page->heap!=NULL) {
     mi_segment_t* segment = _mi_page_segment(page);
-    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id);
+    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == page->heap->thread_id);
     mi_page_queue_t* pq = mi_page_queue_of(page);
     mi_assert_internal(mi_page_queue_contains(pq, page));
     mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_MEDIUM_SIZE_MAX || mi_page_is_in_full(page));
diff --git a/src/segment.c b/src/segment.c
index 31117857..b70dc664 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -13,6 +13,9 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #define MI_PAGE_HUGE_ALIGN  (256*1024)
 
+static void mi_segment_map_allocated_at(const mi_segment_t* segment);
+static void mi_segment_map_freed_at(const mi_segment_t* segment);
+
 /* -----------------------------------------------------------
   Segment allocation
   
@@ -50,21 +53,13 @@ static inline size_t mi_bsr(uintptr_t x) {
 #error "define bsr for your platform"
 #endif
 
-static size_t mi_slice_bin4(size_t slice_count) {
-  if (slice_count==0) return 0;
-  mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT);
-  size_t s = mi_bsr(slice_count);
-  if (s <= 1) return slice_count;
-  size_t bin = ((s << 1) | (slice_count >> (s - 1))&0x01);
-  return bin;
-}
-
 static size_t mi_slice_bin8(size_t slice_count) {
-  if (slice_count==0) return 0;
+  if (slice_count<=1) return slice_count;
   mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT);
+  slice_count--;
   size_t s = mi_bsr(slice_count);
-  if (s <= 2) return slice_count;
-  size_t bin = ((s << 2) | (slice_count >> (s - 2))&0x03) - 5;
+  if (s <= 2) return slice_count + 1;
+  size_t bin = ((s << 2) | ((slice_count >> (s - 2))&0x03)) - 4;
   return bin;
 }
 
@@ -72,7 +67,7 @@ static size_t mi_slice_bin(size_t slice_count) {
   mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE);
   mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) == MI_SEGMENT_BIN_MAX);
   size_t bin = (slice_count==0 ? 0 : mi_slice_bin8(slice_count));
-  mi_assert_internal(bin >= 0 && bin <= MI_SEGMENT_BIN_MAX);
+  mi_assert_internal(bin <= MI_SEGMENT_BIN_MAX);
   return bin;
 }
 
@@ -80,6 +75,7 @@ static size_t mi_slice_bin(size_t slice_count) {
 /* -----------------------------------------------------------
    Page Queues
 ----------------------------------------------------------- */
+/*
 static bool mi_page_queue_is_empty(mi_page_queue_t* pq) {
   return (pq->first == NULL);
 }
@@ -97,6 +93,7 @@ static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq)
   page->block_size = 1; // no more free
   return page;
 }
+*/
 
 static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) {
   // todo: or push to the end?
@@ -111,15 +108,18 @@ static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) {
 
 static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) {
   size_t bin = mi_slice_bin(slice_count);
-  return &tld->pages[bin];
+  mi_page_queue_t* pq = &tld->pages[bin];
+  // mi_assert_internal(pq->block_size >= slice_count);
+  return pq;
 }
 
-static void mi_page_queue_remove(mi_page_queue_t* pq, mi_page_t* page) {
+static void mi_page_queue_delete(mi_page_queue_t* pq, mi_page_t* page) {
   mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0);
+  // should work too if the queue does not contain page (which can happen during reclaim)
   if (page->prev != NULL) page->prev->next = page->next;
-  else pq->first = page->next;
+  if (page == pq->first) pq->first = page->next;
   if (page->next != NULL) page->next->prev = page->prev;
-  else pq->last = page->prev;
+  if (page == pq->last) pq->last = page->prev;
   page->prev = NULL;
   page->next = NULL;
   page->block_size = 1; // no more free
@@ -145,13 +145,13 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
   //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
   mi_slice_t* slice = &segment->slices[0];
-  size_t page_count = 0;
+  size_t used_count = 0;
   mi_page_queue_t* pq;
   while(slice < &segment->slices[segment->slice_count]) {
     mi_assert_internal(slice->slice_count > 0);
-    mi_assert_internal(slice->slice_offset == 0);
+    mi_assert_internal(slice->slice_offset == 0);    
     if (slice->block_size > 0) { // a page in use, all slices need their back offset set
-      page_count++;
+      used_count++;
       for (size_t i = 0; i < slice->slice_count; i++) {
         mi_assert_internal((slice+i)->slice_offset == i);
         mi_assert_internal(i==0 || (slice+i)->slice_count == 0);
@@ -171,7 +171,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
     slice = slice + slice->slice_count;
   }
   mi_assert_internal(slice == &segment->slices[segment->slice_count]);
-  mi_assert_internal(page_count == segment->used + 1);
+  mi_assert_internal(used_count == segment->used + 1);
   return true;
 }
 #endif
@@ -255,6 +255,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
 
 static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
   segment->thread_id = 0;
+  mi_segment_map_freed_at(segment);
   mi_segments_track_size(-((long)segment_size),tld);
   if (mi_option_is_enabled(mi_option_secure)) {
     _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
@@ -265,7 +266,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
 
 // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
 // and no more than 1.
-#define MI_SEGMENT_CACHE_MAX      (1)
+#define MI_SEGMENT_CACHE_MAX      (2)
 #define MI_SEGMENT_CACHE_FRACTION (8)
 
 // note: returned segment may be partially reset
@@ -344,10 +345,10 @@ static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) {
 
 
 static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice_index >= 0 && slice_index < segment->slice_count);
-  size_t bin = mi_slice_bin(slice_count);
+  mi_assert_internal(slice_index < segment->slice_count);
+  mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld);
   if (slice_count==0) slice_count = 1;
-  mi_assert_internal(slice_count >= 0 && slice_index + slice_count - 1 < segment->slice_count);
+  mi_assert_internal(slice_index + slice_count - 1 < segment->slice_count);
 
   // set first and last slice (the intermediates can be undetermined)
   mi_slice_t* slice = &segment->slices[slice_index];
@@ -360,7 +361,7 @@ static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size
     end->block_size = 0;
   }
   // and push it on the free page queue
-  mi_page_queue_push( &tld->pages[bin], mi_slice_to_page(slice) );
+  mi_page_queue_push( pq, mi_slice_to_page(slice) );
 }
 
 static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) {
@@ -368,6 +369,7 @@ static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0);
   size_t slice_index = mi_slice_index(mi_page_to_slice(page));
   mi_segment_page_init(segment,slice_index,page->slice_count,tld);
+
 }
 
 
@@ -386,28 +388,28 @@ static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tl
   // search from best fit up
   mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld);
   if (slice_count == 0) slice_count = 1;
-  while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX] && mi_page_queue_is_empty(pq)) {
+  while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX]) {
+    for( mi_page_t* page = pq->first; page != NULL; page = page->next) {
+      if (page->slice_count >= slice_count) {
+        // found one
+        mi_page_queue_delete(pq,page);
+        if (page->slice_count > slice_count) {
+          mi_segment_page_split(page,slice_count,tld);
+        }
+        mi_assert_internal(page != NULL && page->slice_count == slice_count);
+        return page;
+      }
+    }
     pq++;
   }
-  if (pq > &tld->pages[MI_SEGMENT_BIN_MAX]) {
-    // could not find a page.. 
-    return NULL;
-  }
-
-  // pop the page and split to the right size
-  mi_page_t* page = mi_page_queue_pop(pq);
-  mi_assert_internal(page != NULL && page->slice_count >= slice_count && page->slice_offset == 0);
-  if (page->slice_count > slice_count) {
-    mi_segment_page_split(page, slice_count, tld);
-  }
-  mi_assert_internal(page != NULL && page->slice_count == slice_count);
-  return page;
+  // could not find a page.. 
+  return NULL;  
 }
 
-static void mi_segment_page_remove(mi_slice_t* slice, mi_segments_tld_t* tld) {
+static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) {
   mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0);
   mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld);
-  mi_page_queue_remove(pq, mi_slice_to_page(slice));
+  mi_page_queue_delete(pq, mi_slice_to_page(slice));
 }
 
 
@@ -440,6 +442,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
     }
     segment->memid = memid;
     mi_segments_track_size((long)segment_size, tld);
+    mi_segment_map_allocated_at(segment);
   }
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
@@ -506,7 +509,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
     mi_assert_internal(slice->slice_offset == 0);
     mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages ..
     if (slice->block_size == 0) {
-      mi_segment_page_remove(slice, tld);
+      mi_segment_page_delete(slice, tld);
     }
     page_count++;
     slice = slice + slice->slice_count;
@@ -573,7 +576,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require
   return page;
 }
 
-static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) {
+static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert_internal(page != NULL && page->slice_count > 0 && page->slice_offset == 0 && page->block_size > 0);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_internal(segment->used > 0);
@@ -588,7 +591,7 @@ static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tl
     // free next block -- remove it from free and merge
     mi_assert_internal(next->slice_count > 0 && next->slice_offset==0);
     slice_count += next->slice_count; // extend
-    mi_segment_page_remove(next, tld);
+    mi_segment_page_delete(next, tld);
   }
   if (slice > segment->slices) {
     mi_slice_t* prev = slice - 1;
@@ -598,7 +601,7 @@ static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tl
       // free previous slice -- remove it from free and merge
       mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
       slice_count += prev->slice_count;
-      mi_segment_page_remove(prev, tld);
+      mi_segment_page_delete(prev, tld);
       slice = prev;
     }
   }
@@ -606,6 +609,7 @@ static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tl
   // and add the new free page
   mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld);
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
+  return slice;
 }
 
 
@@ -615,7 +619,7 @@ static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tl
 
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
 
-static void mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
+static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
   mi_assert_internal(page->block_size > 0);
   mi_assert_internal(mi_page_all_free(page));
   mi_segment_t* segment = _mi_ptr_segment(page);
@@ -643,7 +647,7 @@ static void mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
   page->block_size = 1;
 
   // and free it
-  mi_segment_page_free_coalesce(page, tld);
+  return mi_segment_page_free_coalesce(page, tld);
 }
 
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
@@ -682,8 +686,20 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used > 0);
   mi_assert_internal(segment->abandoned_next == NULL);
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  
-  // all pages in the segment are abandoned; add it to the abandoned list
+
+  // remove the free pages from our lists
+  mi_slice_t* slice = &segment->slices[0];  
+  while (slice < mi_segment_last_slice(segment)) {
+    mi_assert_internal(slice->slice_count > 0);
+    mi_assert_internal(slice->slice_offset == 0);
+    if (slice->block_size == 0) { // a free page
+      mi_segment_page_delete(slice,tld);
+      slice->block_size = 0; // but keep it free
+    }
+    slice = slice + slice->slice_count;
+  }
+
+  // add it to the abandoned list
   segment->thread_id = 0;
   do {
     segment->abandoned_next = (mi_segment_t*)abandoned;
@@ -730,37 +746,50 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     mi_atomic_decrement(&abandoned_count);
     mi_assert_expensive(mi_segment_is_valid(segment, tld));
     segment->abandoned_next = NULL;
+    segment->thread_id = _mi_thread_id();
     mi_segments_track_size((long)segment->segment_size,tld);
     mi_assert_internal(segment->next == NULL && segment->prev == NULL);
     _mi_stat_decrease(&tld->stats->segments_abandoned,1);
 
     mi_slice_t* slice = &segment->slices[0];
-    while (slice < mi_segment_last_slice(segment)) {
+    mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page
+    slice = slice + slice->slice_count; // skip the first segment allocated page
+    while (slice <= mi_segment_last_slice(segment)) {
       mi_assert_internal(slice->slice_count > 0);
       mi_assert_internal(slice->slice_offset == 0);
       mi_page_t* page = mi_slice_to_page(slice);
+      if (page->block_size == 0) { // a free page, add it to our lists
+        mi_segment_page_add_free(page,tld);
+      }
       slice = slice + slice->slice_count;
-      if (page->block_size > 0) { // a page in use
-        segment->abandoned--;
+    }
+
+    slice = &segment->slices[0];
+    mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page
+    slice = slice + slice->slice_count; // skip the first segment allocated page
+    while (slice <= mi_segment_last_slice(segment)) {
+      mi_assert_internal(slice->slice_count > 0);
+      mi_assert_internal(slice->slice_offset == 0);
+      mi_page_t* page = mi_slice_to_page(slice);
+      if (page->block_size > 0) { // a used page
         mi_assert_internal(page->next == NULL && page->prev==NULL);
         _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
+        segment->abandoned--;
         if (mi_page_all_free(page)) {
           // if everything free by now, free the page
-          mi_segment_page_clear(page, tld);
+          slice = mi_segment_page_clear(page, tld);   // set slice again due to coalesceing        
         }
         else {
           // otherwise reclaim it
           mi_page_init_flags(page, segment->thread_id);
           _mi_page_reclaim(heap, page);
         }
-      }
-      else {  // free range of slices; add to the free pages        
-        mi_segment_page_add_free(page,tld);
-      }
+      }      
+      mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0);
+      slice = slice + slice->slice_count;
     }
 
     mi_assert(segment->abandoned == 0);
-    segment->thread_id = _mi_thread_id();  // only now for valid checks
     if (segment->used == 0) {  // due to page_clear
       mi_segment_free(segment,false,tld);
     }
@@ -917,6 +946,11 @@ static bool  mi_is_valid_pointer(const void* p) {
   return (_mi_segment_of(p) != NULL);
 }
 
+bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return mi_is_valid_pointer(p);
+}
+
+/*
 // Return the full segment range belonging to a pointer
 static void* mi_segment_range_of(const void* p, size_t* size) {
   mi_segment_t* segment = _mi_segment_of(p);
@@ -929,8 +963,6 @@ static void* mi_segment_range_of(const void* p, size_t* size) {
     return segment;
   }
 }
+*/
 
-bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  return mi_is_valid_pointer(p);
-}
 
diff --git a/src/static.c b/src/static.c
index f1656fa9..df906e04 100644
--- a/src/static.c
+++ b/src/static.c
@@ -15,7 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // functions (on Unix's).
 #include "stats.c"
 #include "os.c"
-#include "memory.c"
+//#include "memory.c"
 #include "segment.c"
 #include "page.c"
 #include "heap.c"
diff --git a/src/stats.c b/src/stats.c
index a9a022fb..aa0c393b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -231,8 +231,8 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n
   mi_stat_count_t normal = { 0,0,0,0 };
   mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out);
   mi_stat_print(&normal, "normal", 1, out);
+  mi_stat_print(&stats->large, "large", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out);
   mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out);
-  mi_stat_print(&stats->large, "giant", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out);
   mi_stat_count_t total = { 0,0,0,0 };
   mi_stat_add(&total, &normal, 1);
   mi_stat_add(&total, &stats->huge, 1);
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 7f20268a..fce319fb 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -143,7 +143,7 @@ size_t _mi_binx4(size_t bsize) {
 }
 
 size_t _mi_binx8(size_t bsize) {
-  if (bsize==0) return 0;
+  if (bsize<=1) return bsize;
   uint8_t b = mi_bsr32((uint32_t)bsize);
   if (b <= 2) return bsize;
   size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
@@ -152,16 +152,20 @@ size_t _mi_binx8(size_t bsize) {
 
 void mi_bins() {
   //printf("  QNULL(1), /* 0 */ \\\n  ");
-  size_t last_bin = 1;
-  for (size_t bsize = 0; bsize < 8*1024; bsize++) {
+  size_t last_bin = 0;
+  size_t min_bsize = 0;
+  size_t last_bsize = 0;
+  for (size_t bsize = 1; bsize < 2*1024; bsize++) {
     size_t size = bsize * 64 * 1024;
     size_t bin = _mi_binx8(bsize);
     if (bin != last_bin) {      
-      printf("bsize: %6zd, size: %6zd, bin: %6zd\n", bsize, size, bin);
+      printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin);
       //printf("QNULL(%6zd), ", wsize);
       //if (last_bin%8 == 0) printf("/* %i */ \\\n  ", last_bin);
       last_bin = bin;
+      min_bsize = bsize;
     }
+    last_bsize = bsize;
   }
 }
 
@@ -186,6 +190,7 @@ int main() {
   //free(p1);
   //p2 = malloc(32);
   //mi_free(p2);  
+  mi_collect(true);
   mi_stats_print(NULL);
   return 0;
 }

From 6ee248b012a56becf6a52b60a2a461f75c7cc7dd Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 15 Aug 2019 14:40:15 -0700
Subject: [PATCH 003/352] wip: fixing bugs in new segment allocation

---
 include/mimalloc-internal.h |  7 +------
 src/segment.c               | 35 ++++++++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index e8fa1ba1..bb60458f 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -254,12 +254,7 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
   return (mi_slice_t*)(p);
 }
 
-static size_t mi_slice_index(const mi_slice_t* slice) {
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  ptrdiff_t index = slice - segment->slices;
-  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count);
-  return index;
-}
+
 
 // Segment belonging to a page
 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
diff --git a/src/segment.c b/src/segment.c
index b70dc664..5b08154b 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -16,6 +16,14 @@ terms of the MIT license. A copy of the license can be found in the file
 static void mi_segment_map_allocated_at(const mi_segment_t* segment);
 static void mi_segment_map_freed_at(const mi_segment_t* segment);
 
+static size_t mi_slice_index(const mi_slice_t* slice) {
+  mi_segment_t* segment = _mi_ptr_segment(slice);
+  ptrdiff_t index = slice - segment->slices;
+  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count);
+  return index;
+}
+
+
 /* -----------------------------------------------------------
   Segment allocation
   
@@ -346,7 +354,7 @@ static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) {
 
 static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_count);
-  mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld);
+  mi_page_queue_t* pq = (slice_count > MI_SLICES_PER_SEGMENT ? NULL : mi_page_queue_for(slice_count,tld));
   if (slice_count==0) slice_count = 1;
   mi_assert_internal(slice_index + slice_count - 1 < segment->slice_count);
 
@@ -360,8 +368,9 @@ static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size
     end->slice_offset = (uint16_t)slice_count - 1;
     end->block_size = 0;
   }
-  // and push it on the free page queue
-  mi_page_queue_push( pq, mi_slice_to_page(slice) );
+  // and push it on the free page queue (if it was not a huge page)
+  if (pq != NULL) mi_page_queue_push( pq, mi_slice_to_page(slice) );
+             else slice->block_size = 0; // mark huge page as free anyways
 }
 
 static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) {
@@ -408,6 +417,7 @@ static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tl
 
 static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) {
   mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0);
+  if (slice->slice_count > MI_SLICES_PER_SEGMENT) return; // huge page
   mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld);
   mi_page_queue_delete(pq, mi_slice_to_page(slice));
 }
@@ -508,7 +518,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);
     mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages ..
-    if (slice->block_size == 0) {
+    if (slice->block_size == 0 && segment->kind != MI_SEGMENT_HUGE) {
       mi_segment_page_delete(slice, tld);
     }
     page_count++;
@@ -581,7 +591,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_internal(segment->used > 0);
   segment->used--;
-
+  
   // free and coalesce the page
   mi_slice_t* slice = mi_page_to_slice(page);
   size_t slice_count = slice->slice_count;
@@ -627,7 +637,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
-
+  
   // reset the page memory to reduce memory pressure?
   if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
     size_t psize;
@@ -812,9 +822,20 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   mi_assert_internal(segment->segment_size - segment->segment_info_size >= size);
   segment->used = 1;
   mi_page_t* page = mi_slice_to_page(&segment->slices[0]);
-  page->slice_count = segment->slice_count;
+  mi_assert_internal(page->block_size > 0 && page->slice_count > 0);
+  size_t initial_count = page->slice_count;
+  page = page + initial_count;
+  page->slice_count = segment->slice_count - initial_count;
   page->slice_offset = 0;
   page->block_size = size;  
+  mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size);
+  // set back pointers  
+  for (size_t i = 1; i < page->slice_count; i++) {
+    mi_slice_t* slice = (mi_slice_t*)(page + i);
+    slice->slice_offset = (uint16_t)i;
+    slice->block_size = 1;
+    slice->slice_count = 0;    
+  }
   mi_page_init_flags(page,segment->thread_id);
   return page;
 }

From f2ba95bc64e3e2a4f1d2054cf15eec66cc3b0db4 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 15 Aug 2019 22:00:42 -0700
Subject: [PATCH 004/352] first working version of new segment allocation

---
 include/mimalloc-internal.h |  7 +++-
 src/page.c                  |  2 ++
 src/segment.c               | 71 ++++++++++++++++++++++---------------
 3 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index bb60458f..e8fa1ba1 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -254,7 +254,12 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
   return (mi_slice_t*)(p);
 }
 
-
+static size_t mi_slice_index(const mi_slice_t* slice) {
+  mi_segment_t* segment = _mi_ptr_segment(slice);
+  ptrdiff_t index = slice - segment->slices;
+  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count);
+  return index;
+}
 
 // Segment belonging to a page
 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
diff --git a/src/page.c b/src/page.c
index 17dd98fb..bb205426 100644
--- a/src/page.c
+++ b/src/page.c
@@ -556,6 +556,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   size_t page_size;
   _mi_segment_page_start(segment, page, &page_size);
   page->block_size = block_size;
+  mi_assert_internal(page->block_size <= page_size);
+  mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
   mi_assert_internal(page_size / block_size < (1L<<16));
   page->reserved = (uint16_t)(page_size / block_size);
   #if MI_SECURE
diff --git a/src/segment.c b/src/segment.c
index 5b08154b..e6eb0b08 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -16,12 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file
 static void mi_segment_map_allocated_at(const mi_segment_t* segment);
 static void mi_segment_map_freed_at(const mi_segment_t* segment);
 
-static size_t mi_slice_index(const mi_slice_t* slice) {
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  ptrdiff_t index = slice - segment->slices;
-  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count);
-  return index;
-}
+
 
 
 /* -----------------------------------------------------------
@@ -158,16 +153,18 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   while(slice < &segment->slices[segment->slice_count]) {
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);    
+    size_t index = mi_slice_index(slice);
+    size_t maxindex = (index + slice->slice_count >= segment->slice_count ? segment->slice_count : index + slice->slice_count) - 1;
     if (slice->block_size > 0) { // a page in use, all slices need their back offset set
       used_count++;
-      for (size_t i = 0; i < slice->slice_count; i++) {
-        mi_assert_internal((slice+i)->slice_offset == i);
-        mi_assert_internal(i==0 || (slice+i)->slice_count == 0);
-        mi_assert_internal(i==0 || (slice+i)->block_size == 1);
+      for (size_t i = index; i <= maxindex; i++) {
+        mi_assert_internal(segment->slices[i].slice_offset == i - index);
+        mi_assert_internal(i==index || segment->slices[i].slice_count == 0);
+        mi_assert_internal(i==index || segment->slices[i].block_size == 1);
       }
     }
     else {  // free range of slices; only last slice needs a valid back offset
-      mi_slice_t* end = slice + slice->slice_count -  1;
+      mi_slice_t* end = &segment->slices[maxindex];
       mi_assert_internal(slice == end - end->slice_offset);
       mi_assert_internal(slice == end || end->slice_count == 0 );
       mi_assert_internal(end->block_size == 0);
@@ -176,7 +173,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
         mi_assert_internal(mi_page_queue_contains(pq,mi_slice_to_page(slice)));
       }
     }    
-    slice = slice + slice->slice_count;
+    slice = &segment->slices[maxindex+1];
   }
   mi_assert_internal(slice == &segment->slices[segment->slice_count]);
   mi_assert_internal(used_count == segment->used + 1);
@@ -239,7 +236,8 @@ static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_si
 ;
   if (info_size != NULL) *info_size = isize;
   if (pre_size != NULL)  *pre_size = isize + guardsize;
-  size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_SEGMENT_SLICE_SIZE) );
+  isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE);
+  size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) );
   mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0);
   return segment_size;
 }
@@ -261,14 +259,14 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
 }
 
 
-static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
+static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   segment->thread_id = 0;
   mi_segment_map_freed_at(segment);
-  mi_segments_track_size(-((long)segment_size),tld);
+  mi_segments_track_size(-((long)segment->segment_size),tld);
   if (mi_option_is_enabled(mi_option_secure)) {
     _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
   }
-  _mi_os_free(segment, segment_size, /*segment->memid,*/ tld->stats);
+  _mi_os_free(segment, segment->segment_size, /*segment->memid,*/ tld->stats);
 }
 
 
@@ -301,7 +299,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
   while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
     mi_segment_t* segment = mi_segment_cache_pop(0,tld);
     mi_assert_internal(segment != NULL);
-    if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld);
+    if (segment != NULL) mi_segment_os_free(segment, tld);
   }
   return true;
 }
@@ -326,7 +324,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
 void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
   mi_segment_t* segment;
   while ((segment = mi_segment_cache_pop(0,tld)) != NULL) {
-    mi_segment_os_free(segment, segment->segment_size, tld);
+    mi_segment_os_free(segment, tld);
   }
   mi_assert_internal(tld->cache_count == 0);
   mi_assert_internal(tld->cache == NULL);
@@ -347,6 +345,10 @@ static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) {
   return &segment->slices[segment->slice_count-1];
 }
 
+static size_t mi_slices_in(size_t size) {
+  return (size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE;
+}
+
 /* -----------------------------------------------------------
    Page management
 ----------------------------------------------------------- */
@@ -354,7 +356,7 @@ static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) {
 
 static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_count);
-  mi_page_queue_t* pq = (slice_count > MI_SLICES_PER_SEGMENT ? NULL : mi_page_queue_for(slice_count,tld));
+  mi_page_queue_t* pq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_page_queue_for(slice_count,tld));
   if (slice_count==0) slice_count = 1;
   mi_assert_internal(slice_index + slice_count - 1 < segment->slice_count);
 
@@ -387,6 +389,7 @@ static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segmen
   mi_assert_internal(page->block_size > 0); // no more in free queue
   if (page->slice_count <= slice_count) return;
   mi_segment_t* segment = _mi_page_segment(page);
+  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count;
   size_t next_count = page->slice_count - slice_count;
   mi_segment_page_init( segment, next_index, next_count, tld );  
@@ -394,6 +397,7 @@ static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segmen
 }
 
 static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { 
+  mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_SIZE_MAX);
   // search from best fit up
   mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld);
   if (slice_count == 0) slice_count = 1;
@@ -417,7 +421,7 @@ static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tl
 
 static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) {
   mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0);
-  if (slice->slice_count > MI_SLICES_PER_SEGMENT) return; // huge page
+  mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE);
   mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld);
   mi_page_queue_delete(pq, mi_slice_to_page(slice));
 }
@@ -434,8 +438,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   size_t info_size;
   size_t pre_size;
   size_t segment_size = mi_segment_size(required, &pre_size, &info_size);
-  size_t slice_count = segment_size / MI_SEGMENT_SLICE_SIZE;
-  mi_assert_internal(segment_size >= required);
+  size_t slice_count = mi_slices_in(segment_size);
+  if (slice_count > MI_SLICES_PER_SEGMENT) slice_count = MI_SLICES_PER_SEGMENT;
+  mi_assert_internal(segment_size - _mi_align_up(sizeof(mi_segment_t),MI_SEGMENT_SLICE_SIZE) >= required);
+  mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0);
   //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0);
 
   // Try to get it from our thread local cache first
@@ -514,7 +520,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
   // Remove the free pages
   mi_slice_t* slice = &segment->slices[0];
   size_t page_count = 0;
-  while (slice < mi_segment_last_slice(segment)) {
+  while (slice <= mi_segment_last_slice(segment)) {
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);
     mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages ..
@@ -534,7 +540,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
   }
   else {
     // otherwise return it to the OS
-    mi_segment_os_free(segment, segment->segment_size, tld);
+    mi_segment_os_free(segment,  tld);
   }
 }
 
@@ -657,7 +663,15 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
   page->block_size = 1;
 
   // and free it
-  return mi_segment_page_free_coalesce(page, tld);
+  if (segment->kind != MI_SEGMENT_HUGE) {
+    return mi_segment_page_free_coalesce(page, tld);
+  }
+  else {
+    mi_assert_internal(segment->used == 1);
+    segment->used--;
+    page->block_size = 0;  // pretend free
+    return mi_page_to_slice(page);
+  }
 }
 
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
@@ -699,7 +713,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 
   // remove the free pages from our lists
   mi_slice_t* slice = &segment->slices[0];  
-  while (slice < mi_segment_last_slice(segment)) {
+  while (slice <= mi_segment_last_slice(segment)) {
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);
     if (slice->block_size == 0) { // a free page
@@ -825,12 +839,13 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   mi_assert_internal(page->block_size > 0 && page->slice_count > 0);
   size_t initial_count = page->slice_count;
   page = page + initial_count;
-  page->slice_count = segment->slice_count - initial_count;
+  page->slice_count  = (segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE;
   page->slice_offset = 0;
   page->block_size = size;  
   mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size);
+  mi_assert_internal(page->slice_count >= segment->slice_count - initial_count);
   // set back pointers  
-  for (size_t i = 1; i < page->slice_count; i++) {
+  for (size_t i = 1; i <segment->slice_count; i++) {
     mi_slice_t* slice = (mi_slice_t*)(page + i);
     slice->slice_offset = (uint16_t)i;
     slice->block_size = 1;

From a0b4ac2f66f36a117b69ec3d45b55b771fdbecbc Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 15 Aug 2019 23:19:52 -0700
Subject: [PATCH 005/352] new segment allocation; good results with Qas service

---
 include/mimalloc-internal.h | 22 ++++++++++----------
 include/mimalloc-types.h    |  8 ++++----
 src/page.c                  |  4 ++--
 src/segment.c               | 40 +++++++++++++++++++++----------------
 4 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index e8fa1ba1..3aee4ae1 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -254,20 +254,21 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
   return (mi_slice_t*)(p);
 }
 
-static size_t mi_slice_index(const mi_slice_t* slice) {
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  ptrdiff_t index = slice - segment->slices;
-  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count);
-  return index;
-}
-
 // Segment belonging to a page
 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
-  mi_segment_t* segment = _mi_ptr_segment(page);
-  mi_assert_internal(segment == NULL || page == mi_slice_to_page(&segment->slices[mi_slice_index(mi_page_to_slice((mi_page_t*)page))]));
+  mi_segment_t* segment = _mi_ptr_segment(page); 
+  mi_assert_internal(segment == NULL || (mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_count);
   return segment;
 }
 
+static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
+  mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset);  
+  mi_assert_internal(start >= _mi_ptr_segment(slice)->slices);
+  mi_assert_internal(start->slice_offset == 0);
+  mi_assert_internal(start + start->slice_count > slice);
+  return start;
+}
+
 // Get the page containing the pointer
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
@@ -275,8 +276,7 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
   uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT;
   mi_assert_internal(idx < segment->slice_count);
   mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; 
-  mi_slice_t* slice = slice0 - slice0->slice_offset;  // adjust to the block that holds the page data
-  mi_assert_internal(slice->slice_count > slice0->slice_offset);
+  mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data  
   mi_assert_internal(slice->slice_offset == 0);
   mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count);
   return mi_slice_to_page(slice);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index f4042e60..7a240b7e 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -78,7 +78,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
-#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SEGMENT_SLICE_SHIFT)  // 512kb
+#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SEGMENT_SLICE_SHIFT)  // 1024kb
 
 
 // Derived constants
@@ -90,7 +90,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_SMALL_PAGE_SIZE                (1<<MI_SMALL_PAGE_SHIFT)
 #define MI_MEDIUM_PAGE_SIZE               (1<<MI_MEDIUM_PAGE_SHIFT)
 
-#define MI_MEDIUM_SIZE_MAX                (MI_MEDIUM_PAGE_SIZE/8)   // 64kb on 64-bit
+#define MI_MEDIUM_SIZE_MAX                (MI_MEDIUM_PAGE_SIZE/4)   // 128kb on 64-bit
 #define MI_MEDIUM_WSIZE_MAX               (MI_MEDIUM_SIZE_MAX/MI_INTPTR_SIZE)   // 64kb on 64-bit
 
 #define MI_LARGE_SIZE_MAX                 (MI_SEGMENT_SIZE/4)       // 16mb on 64-bit
@@ -155,8 +155,8 @@ typedef uintptr_t mi_thread_free_t;
 // - using `uint16_t` does not seem to slow things down
 typedef struct mi_page_s {
   // "owned" by the segment
-  size_t                slice_count;       // slices in this page (0 if not a page)
-  uint16_t              slice_offset;      // distance from the actual page data slice (0 if a page)
+  uint32_t              slice_count;       // slices in this page (0 if not a page)
+  uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
   bool                  is_reset;          // `true` if the page memory was reset
   bool                  is_committed;      // `true` if the page virtual memory is committed
 
diff --git a/src/page.c b/src/page.c
index bb205426..554c82be 100644
--- a/src/page.c
+++ b/src/page.c
@@ -170,7 +170,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
 
 void _mi_page_free_collect(mi_page_t* page, bool force) {
   mi_assert_internal(page!=NULL);
-
+  
   // collect the thread free list
   if (force || mi_tf_block(page->thread_free) != NULL) {  // quick test to avoid an atomic operation
     _mi_page_thread_free_collect(page);
@@ -703,7 +703,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
   General allocation
 ----------------------------------------------------------- */
 
-// A huge page is allocated directly without being in a queue
+// Large and huge pages are allocated directly without being in a queue
 static mi_page_t* mi_large_page_alloc(mi_heap_t* heap, size_t size) {
   size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t);
   mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
diff --git a/src/segment.c b/src/segment.c
index e6eb0b08..fd16e2e9 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -74,6 +74,13 @@ static size_t mi_slice_bin(size_t slice_count) {
   return bin;
 }
 
+static size_t mi_slice_index(const mi_slice_t* slice) {
+  mi_segment_t* segment = _mi_ptr_segment(slice);
+  ptrdiff_t index = slice - segment->slices;
+  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count);
+  return index;
+}
+
 
 /* -----------------------------------------------------------
    Page Queues
@@ -98,7 +105,7 @@ static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq)
 }
 */
 
-static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) {
+static void mi_page_queue_enqueue(mi_page_queue_t* pq, mi_page_t* page) {
   // todo: or push to the end?
   mi_assert_internal(page->prev == NULL && page->next==NULL);
   page->prev = NULL; // paranoia
@@ -158,14 +165,14 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
     if (slice->block_size > 0) { // a page in use, all slices need their back offset set
       used_count++;
       for (size_t i = index; i <= maxindex; i++) {
-        mi_assert_internal(segment->slices[i].slice_offset == i - index);
+        mi_assert_internal(segment->slices[i].slice_offset == (i - index)*sizeof(mi_page_t));
         mi_assert_internal(i==index || segment->slices[i].slice_count == 0);
         mi_assert_internal(i==index || segment->slices[i].block_size == 1);
       }
     }
     else {  // free range of slices; only last slice needs a valid back offset
       mi_slice_t* end = &segment->slices[maxindex];
-      mi_assert_internal(slice == end - end->slice_offset);
+      mi_assert_internal((uint8_t*)slice == (uint8_t*)end - end->slice_offset);
       mi_assert_internal(slice == end || end->slice_count == 0 );
       mi_assert_internal(end->block_size == 0);
       if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) {
@@ -272,7 +279,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
 
 // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
 // and no more than 1.
-#define MI_SEGMENT_CACHE_MAX      (2)
+#define MI_SEGMENT_CACHE_MAX      (4)
 #define MI_SEGMENT_CACHE_FRACTION (8)
 
 // note: returned segment may be partially reset
@@ -362,16 +369,16 @@ static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size
 
   // set first and last slice (the intermediates can be undetermined)
   mi_slice_t* slice = &segment->slices[slice_index];
-  slice->slice_count = slice_count;
+  slice->slice_count = (uint32_t)slice_count;
   slice->slice_offset = 0;
   if (slice_count > 1) {
     mi_slice_t* end = &segment->slices[slice_index + slice_count - 1];
     end->slice_count = 0;
-    end->slice_offset = (uint16_t)slice_count - 1;
+    end->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1));
     end->block_size = 0;
   }
   // and push it on the free page queue (if it was not a huge page)
-  if (pq != NULL) mi_page_queue_push( pq, mi_slice_to_page(slice) );
+  if (pq != NULL) mi_page_queue_enqueue( pq, mi_slice_to_page(slice) );
              else slice->block_size = 0; // mark huge page as free anyways
 }
 
@@ -393,7 +400,7 @@ static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segmen
   size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count;
   size_t next_count = page->slice_count - slice_count;
   mi_segment_page_init( segment, next_index, next_count, tld );  
-  page->slice_count = slice_count;
+  page->slice_count = (uint32_t)slice_count;
 }
 
 static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { 
@@ -494,11 +501,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   for (size_t i = 0; i < islice_count; i++) {
     mi_slice_t* slice = &segment->slices[i];
     if (i==0) {
-      slice->slice_count = islice_count;
+      slice->slice_count = (uint32_t)islice_count;
       slice->block_size = islice_count * MI_SEGMENT_SLICE_SIZE;
     }
     else {
-      slice->slice_offset = (uint16_t)i;
+      slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i);
       slice->block_size = 1;
     }
   }
@@ -553,7 +560,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require
   mi_assert_internal(required <= MI_LARGE_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
 
   // find a free page
-  size_t page_size = _mi_align_up(required,MI_SEGMENT_SLICE_SIZE);
+  size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
   size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
   mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
   if (page==NULL) {
@@ -569,7 +576,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require
   bool commit = false;
   bool unreset = false;
   for (size_t i = 0; i < page->slice_count; i++, slice++) {
-    slice->slice_offset = (uint16_t)i;
+    slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i);
     slice->block_size = 1;
     if (i > 0) slice->slice_count = 0;
     if (!segment->all_committed && !slice->is_committed) {
@@ -610,8 +617,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl
     mi_segment_page_delete(next, tld);
   }
   if (slice > segment->slices) {
-    mi_slice_t* prev = slice - 1;
-    prev = prev - prev->slice_offset;
+    mi_slice_t* prev = mi_slice_first(slice - 1);    
     mi_assert_internal(prev >= segment->slices);
     if (prev->block_size==0) {
       // free previous slice -- remove it from free and merge
@@ -653,7 +659,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
   }
 
   // zero the page data
-  size_t slice_count = page->slice_count; // don't clear the slice_count
+  uint32_t slice_count = page->slice_count; // don't clear the slice_count
   bool is_reset = page->is_reset;         // don't clear the reset flag
   bool is_committed = page->is_committed; // don't clear the commit flag
   memset(page, 0, sizeof(*page));
@@ -839,7 +845,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   mi_assert_internal(page->block_size > 0 && page->slice_count > 0);
   size_t initial_count = page->slice_count;
   page = page + initial_count;
-  page->slice_count  = (segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE;
+  page->slice_count  = (uint32_t)((segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE);
   page->slice_offset = 0;
   page->block_size = size;  
   mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size);
@@ -847,7 +853,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   // set back pointers  
   for (size_t i = 1; i <segment->slice_count; i++) {
     mi_slice_t* slice = (mi_slice_t*)(page + i);
-    slice->slice_offset = (uint16_t)i;
+    slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i);
     slice->block_size = 1;
     slice->slice_count = 0;    
   }

From 91497e8d2d797ab8b9d55c9618b36a9f3f88dcd9 Mon Sep 17 00:00:00 2001
From: Daan Leijen <Daan@microsoft.com>
Date: Fri, 16 Aug 2019 17:49:49 -0700
Subject: [PATCH 006/352] whitespace and warning fix

---
 include/mimalloc-internal.h | 18 ++++-----
 src/segment.c               | 78 ++++++++++++++++++-------------------
 2 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 3aee4ae1..69c150c2 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -17,7 +17,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #if (MI_DEBUG>0)
 #define mi_trace_message(...)  _mi_trace_message(__VA_ARGS__)
 #else
-#define mi_trace_message(...)  
+#define mi_trace_message(...)
 #endif
 
 
@@ -144,8 +144,8 @@ bool        _mi_page_is_valid(mi_page_t* page);
   Inlined definitions
 ----------------------------------------------------------- */
 #define UNUSED(x)     (void)(x)
-#if (MI_DEBUG>0) 
-#define UNUSED_RELEASE(x)  
+#if (MI_DEBUG>0)
+#define UNUSED_RELEASE(x)
 #else
 #define UNUSED_RELEASE(x)  UNUSED(x)
 #endif
@@ -256,13 +256,13 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
 
 // Segment belonging to a page
 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
-  mi_segment_t* segment = _mi_ptr_segment(page); 
-  mi_assert_internal(segment == NULL || (mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_count);
+  mi_segment_t* segment = _mi_ptr_segment(page);
+  mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_count));
   return segment;
 }
 
 static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
-  mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset);  
+  mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset);
   mi_assert_internal(start >= _mi_ptr_segment(slice)->slices);
   mi_assert_internal(start->slice_offset == 0);
   mi_assert_internal(start + start->slice_count > slice);
@@ -275,8 +275,8 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
   mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE);
   uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT;
   mi_assert_internal(idx < segment->slice_count);
-  mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; 
-  mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data  
+  mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
+  mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data
   mi_assert_internal(slice->slice_offset == 0);
   mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count);
   return mi_slice_to_page(slice);
@@ -354,7 +354,7 @@ static inline uintptr_t mi_page_thread_id(const mi_page_t* page) {
 }
 
 static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) {
-  page->flags = thread_id;  
+  page->flags = thread_id;
 }
 
 static inline bool mi_page_is_in_full(const mi_page_t* page) {
diff --git a/src/segment.c b/src/segment.c
index fd16e2e9..31fbccf9 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -21,7 +21,7 @@ static void mi_segment_map_freed_at(const mi_segment_t* segment);
 
 /* -----------------------------------------------------------
   Segment allocation
-  
+
 
   In any case the memory for a segment is virtual and only
   committed on demand (i.e. we are careful to not touch the memory
@@ -113,7 +113,7 @@ static void mi_page_queue_enqueue(mi_page_queue_t* pq, mi_page_t* page) {
   pq->first = page;
   if (page->next != NULL) page->next->prev = page;
                      else pq->last = page;
-  page->block_size = 0; // free                     
+  page->block_size = 0; // free
 }
 
 static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) {
@@ -141,7 +141,7 @@ static void mi_page_queue_delete(mi_page_queue_t* pq, mi_page_t* page) {
 ----------------------------------------------------------- */
 
 #if (MI_DEBUG > 1)
-static bool mi_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) {
+static bool mi_segment_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) {
   for (mi_page_t* p = pq->first; p != NULL; p = p->next) {
     if (p==page) return true;
   }
@@ -159,7 +159,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_page_queue_t* pq;
   while(slice < &segment->slices[segment->slice_count]) {
     mi_assert_internal(slice->slice_count > 0);
-    mi_assert_internal(slice->slice_offset == 0);    
+    mi_assert_internal(slice->slice_offset == 0);
     size_t index = mi_slice_index(slice);
     size_t maxindex = (index + slice->slice_count >= segment->slice_count ? segment->slice_count : index + slice->slice_count) - 1;
     if (slice->block_size > 0) { // a page in use, all slices need their back offset set
@@ -177,9 +177,9 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
       mi_assert_internal(end->block_size == 0);
       if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) {
         pq = mi_page_queue_for(slice->slice_count,tld);
-        mi_assert_internal(mi_page_queue_contains(pq,mi_slice_to_page(slice)));
+        mi_assert_internal(mi_segment_page_queue_contains(pq,mi_slice_to_page(slice)));
       }
-    }    
+    }
     slice = &segment->slices[maxindex+1];
   }
   mi_assert_internal(slice == &segment->slices[segment->slice_count]);
@@ -193,7 +193,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
 ----------------------------------------------------------- */
 
 // Start of the page available memory; can be used on uninitialized pages
-uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) 
+uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
 {
   mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
   ptrdiff_t idx     = slice - segment->slices;
@@ -233,7 +233,7 @@ static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_si
   size_t page_size = _mi_os_page_size();
   size_t isize     = _mi_align_up(sizeof(mi_segment_t), page_size);
   size_t guardsize = 0;
-  
+
   if (mi_option_is_enabled(mi_option_secure)) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data (and one at the end of the segment)
@@ -296,7 +296,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
 }
 
 static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
-  if (tld->cache_count <  MI_SEGMENT_CACHE_MAX 
+  if (tld->cache_count <  MI_SEGMENT_CACHE_MAX
       && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))
      ) { // always allow 1 element cache
     return false;
@@ -339,7 +339,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
 
 
 /* -----------------------------------------------------------
-   Slices 
+   Slices
 ----------------------------------------------------------- */
 
 
@@ -399,11 +399,11 @@ static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segmen
   mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count;
   size_t next_count = page->slice_count - slice_count;
-  mi_segment_page_init( segment, next_index, next_count, tld );  
+  mi_segment_page_init( segment, next_index, next_count, tld );
   page->slice_count = (uint32_t)slice_count;
 }
 
-static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { 
+static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_SIZE_MAX);
   // search from best fit up
   mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld);
@@ -422,8 +422,8 @@ static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tl
     }
     pq++;
   }
-  // could not find a page.. 
-  return NULL;  
+  // could not find a page..
+  return NULL;
 }
 
 static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) {
@@ -452,7 +452,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0);
 
   // Try to get it from our thread local cache first
-  bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit) 
+  bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit)
                 || required > 0; // huge page
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
   if (segment==NULL) {
@@ -482,7 +482,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
     _mi_os_protect((uint8_t*)segment + info_size, (pre_size - info_size));
     size_t os_page_size = _mi_os_page_size();
     // and protect the last page too
-    _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size);        
+    _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size);
     slice_count--; // don't use the last slice :-(
   }
 
@@ -519,7 +519,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
 
 
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
-  mi_assert_internal(segment != NULL);  
+  mi_assert_internal(segment != NULL);
   mi_assert_internal(segment->next == NULL);
   mi_assert_internal(segment->prev == NULL);
   mi_assert_internal(segment->used == 0);
@@ -541,7 +541,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
 
   // stats
   _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
-  
+
   if (!force && mi_segment_cache_push(segment, tld)) {
     // it is put in our cache
   }
@@ -555,7 +555,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
    Page allocation
 ----------------------------------------------------------- */
 
-static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) 
+static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
   mi_assert_internal(required <= MI_LARGE_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
 
@@ -565,7 +565,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require
   mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
   if (page==NULL) {
     // no free page, allocate a new segment and try again
-    if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL;  // OOM    
+    if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL;  // OOM
     return mi_segment_page_alloc(page_kind, required, tld, os_tld);
   }
   mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
@@ -581,11 +581,11 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require
     if (i > 0) slice->slice_count = 0;
     if (!segment->all_committed && !slice->is_committed) {
       slice->is_committed = true;
-      commit = true;    
+      commit = true;
     }
     if (slice->is_reset) {
       slice->is_reset = false;
-      unreset = true;      
+      unreset = true;
     }
   }
   uint8_t* page_start = mi_slice_start(mi_page_to_slice(page));
@@ -593,7 +593,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require
   if(unreset){ _mi_os_unreset(page_start, page_size, tld->stats); }
 
   // initialize the page and return
-  mi_assert_internal(segment->thread_id == _mi_thread_id()); 
+  mi_assert_internal(segment->thread_id == _mi_thread_id());
   segment->used++;
   mi_page_init_flags(page, segment->thread_id);
   return page;
@@ -604,7 +604,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_internal(segment->used > 0);
   segment->used--;
-  
+
   // free and coalesce the page
   mi_slice_t* slice = mi_page_to_slice(page);
   size_t slice_count = slice->slice_count;
@@ -617,7 +617,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl
     mi_segment_page_delete(next, tld);
   }
   if (slice > segment->slices) {
-    mi_slice_t* prev = mi_slice_first(slice - 1);    
+    mi_slice_t* prev = mi_slice_first(slice - 1);
     mi_assert_internal(prev >= segment->slices);
     if (prev->block_size==0) {
       // free previous slice -- remove it from free and merge
@@ -627,7 +627,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl
       slice = prev;
     }
   }
-  
+
   // and add the new free page
   mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld);
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
@@ -649,7 +649,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
-  
+
   // reset the page memory to reduce memory pressure?
   if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
     size_t psize;
@@ -696,7 +696,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
   else if (segment->used == segment->abandoned) {
     // only abandoned pages; remove from free list and abandon
     mi_segment_abandon(segment,tld);
-  }  
+  }
 }
 
 
@@ -718,7 +718,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
 
   // remove the free pages from our lists
-  mi_slice_t* slice = &segment->slices[0];  
+  mi_slice_t* slice = &segment->slices[0];
   while (slice <= mi_segment_last_slice(segment)) {
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);
@@ -807,14 +807,14 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
         segment->abandoned--;
         if (mi_page_all_free(page)) {
           // if everything free by now, free the page
-          slice = mi_segment_page_clear(page, tld);   // set slice again due to coalesceing        
+          slice = mi_segment_page_clear(page, tld);   // set slice again due to coalesceing
         }
         else {
           // otherwise reclaim it
           mi_page_init_flags(page, segment->thread_id);
           _mi_page_reclaim(heap, page);
         }
-      }      
+      }
       mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0);
       slice = slice + slice->slice_count;
     }
@@ -824,7 +824,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
       mi_segment_free(segment,false,tld);
     }
     else {
-      reclaimed++;      
+      reclaimed++;
     }
   }
   return (reclaimed>0);
@@ -847,15 +847,15 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   page = page + initial_count;
   page->slice_count  = (uint32_t)((segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE);
   page->slice_offset = 0;
-  page->block_size = size;  
+  page->block_size = size;
   mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size);
   mi_assert_internal(page->slice_count >= segment->slice_count - initial_count);
-  // set back pointers  
+  // set back pointers
   for (size_t i = 1; i <segment->slice_count; i++) {
     mi_slice_t* slice = (mi_slice_t*)(page + i);
     slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i);
     slice->block_size = 1;
-    slice->slice_count = 0;    
+    slice->slice_count = 0;
   }
   mi_page_init_flags(page,segment->thread_id);
   return page;
@@ -899,7 +899,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
 ----------------------------------------------------------- */
 
 #if (MI_INTPTR_SIZE==8)
-#define MI_MAX_ADDRESS    ((size_t)1 << 44)   // 16TB 
+#define MI_MAX_ADDRESS    ((size_t)1 << 44)   // 16TB
 #else
 #define MI_MAX_ADDRESS    ((size_t)1 << 31)   // 2Gb
 #endif
@@ -911,7 +911,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
 static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE];  // 1KiB per TB with 128MiB segments
 
 static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
-  mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB?  
+  mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB?
   uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE;
   *bitidx = segindex % (8*MI_INTPTR_SIZE);
   return (segindex / (8*MI_INTPTR_SIZE));
@@ -953,7 +953,7 @@ static mi_segment_t* _mi_segment_of(const void* p) {
     return segment; // yes, allocated by us
   }
   if (index==0) return NULL;
-  // search downwards for the first segment in case it is an interior pointer 
+  // search downwards for the first segment in case it is an interior pointer
   // could be slow but searches in 256MiB steps trough valid huge objects
   // note: we could maintain a lowest index to speed up the path for invalid pointers?
   size_t lobitidx;
@@ -1006,5 +1006,3 @@ static void* mi_segment_range_of(const void* p, size_t* size) {
   }
 }
 */
-
-

From cce38bc147b6290607d086bc1126946dd0cf9ab1 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 24 Aug 2019 07:32:23 -0700
Subject: [PATCH 007/352] more conservative setting to avoid internal
 fragmentation

---
 include/mimalloc-types.h | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 9a482aff..77e9e4ab 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -78,7 +78,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
-#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SEGMENT_SLICE_SHIFT)  // 1024kb
+#define MI_MEDIUM_PAGE_SHIFT              ( 2 + MI_SEGMENT_SLICE_SHIFT)  // 512kb
 
 
 // Derived constants
@@ -90,12 +90,12 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_SMALL_PAGE_SIZE                (1<<MI_SMALL_PAGE_SHIFT)
 #define MI_MEDIUM_PAGE_SIZE               (1<<MI_MEDIUM_PAGE_SHIFT)
 
-#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 16kb on 64-bit
+#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/8)   // 8kb on 64-bit
 
-#define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)   // 128kb on 64-bit
+#define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128kb on 64-bit
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   // 64kb on 64-bit
 
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/4)       // 16mb on 64-bit
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/4)      // 16mb on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 
 // Minimal alignment necessary. On most platforms 16 bytes are needed
@@ -186,9 +186,9 @@ typedef struct mi_page_s {
   struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
 
   // improve page index calculation
-  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word
-  #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0)
-  void*                 padding[1];        // 12 words on 64-bit in secure mode, 12 words on 32-bit plain
+  // without padding: 11 words on 64-bit, 13 on 32-bit. Secure adds one word
+  #if (MI_SECURE==0)
+  void*                 padding[1];        // 12 words on 64-bit, 14 words on 32-bit 
   #endif
 } mi_page_t;
 
@@ -212,15 +212,14 @@ typedef mi_page_t mi_slice_t;
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
 typedef struct mi_segment_s {
-  struct mi_segment_s* next;
-  struct mi_segment_s* prev;
-  volatile struct mi_segment_s* abandoned_next;
-  size_t          abandoned;   // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
-  size_t          used;        // count of pages in use
-  size_t          segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
+  struct mi_segment_s*          next;            // the list of freed segments in the cache
+  volatile struct mi_segment_s* abandoned_next;  // the list of abandoned segments
+  size_t          abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
+  size_t          used;               // count of pages in use
+  size_t          segment_size;       // for huge pages this may be different from `MI_SEGMENT_SIZE`
   size_t          segment_info_size;  // space we are using from the first page for segment meta-data and possible guard pages.
-  uintptr_t       cookie;      // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
-  size_t          memid;       // id for the os-level memory manager
+  uintptr_t       cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
+  size_t          memid;              // id for the os-level memory manager
   bool            all_committed;
 
   // layout like this to optimize access in `mi_free`

From 612b2cc9b764783dbb3b52aeb47b35eab405e6db Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 24 Aug 2019 12:20:32 -0700
Subject: [PATCH 008/352] clean up segment slice handling

---
 include/mimalloc-internal.h |   6 +-
 include/mimalloc-types.h    |  44 ++-
 src/init.c                  |  39 ++-
 src/segment.c               | 592 ++++++++++++++++++------------------
 test/test-stress.c          |   6 +-
 5 files changed, 352 insertions(+), 335 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 7566baa0..bf5c2e04 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -259,7 +259,7 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
 // Segment belonging to a page
 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
   mi_segment_t* segment = _mi_ptr_segment(page); 
-  mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_count));
+  mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
   return segment;
 }
 
@@ -276,11 +276,11 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
   mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE);
   uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT;
-  mi_assert_internal(idx < segment->slice_count);
+  mi_assert_internal(idx < segment->slice_entries);
   mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
   mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data
   mi_assert_internal(slice->slice_offset == 0);
-  mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count);
+  mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries);
   return mi_slice_to_page(slice);
 }
 
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 77e9e4ab..78b643ad 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -78,7 +78,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
-#define MI_MEDIUM_PAGE_SHIFT              ( 2 + MI_SEGMENT_SLICE_SHIFT)  // 512kb
+#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512kb
 
 
 // Derived constants
@@ -109,6 +109,9 @@ terms of the MIT license. A copy of the license can be found in the file
 #error "define more bins"
 #endif
 
+// Maximum slice offset (7)
+#define MI_MAX_SLICE_OFFSET               ((MI_MEDIUM_PAGE_SIZE / MI_SEGMENT_SLICE_SIZE) - 1)
+
 typedef uintptr_t mi_encoded_t;
 
 // free lists contain blocks
@@ -206,6 +209,12 @@ typedef enum mi_segment_kind_e {
   MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
 } mi_segment_kind_t;
 
+#define MI_COMMIT_SIZE      ((size_t)2 << 20)   // OS large page size
+
+#if ((MI_SEGMENT_SIZE / MI_COMMIT_SIZE) > MI_INTPTR_SIZE)
+#error "not enough commit bits to cover the segment size"
+#endif
+
 typedef mi_page_t mi_slice_t;
 
 // Segments are large allocated memory blocks (2mb on 64 bit) from
@@ -214,18 +223,21 @@ typedef mi_page_t mi_slice_t;
 typedef struct mi_segment_s {
   struct mi_segment_s*          next;            // the list of freed segments in the cache
   volatile struct mi_segment_s* abandoned_next;  // the list of abandoned segments
-  size_t          abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
-  size_t          used;               // count of pages in use
-  size_t          segment_size;       // for huge pages this may be different from `MI_SEGMENT_SIZE`
-  size_t          segment_info_size;  // space we are using from the first page for segment meta-data and possible guard pages.
-  uintptr_t       cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
-  size_t          memid;              // id for the os-level memory manager
-  bool            all_committed;
+
+  size_t            abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
+  size_t            used;               // count of pages in use
+  uintptr_t         cookie;               // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`  
+
+  size_t            segment_slices;       // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
+  size_t            segment_info_slices;  // initial slices we are using segment info and possible guard pages.
+
+  bool              allow_decommit;
+  uintptr_t         commit_mask;
 
   // layout like this to optimize access in `mi_free`
   mi_segment_kind_t kind;
   uintptr_t         thread_id;
-  size_t            slice_count; // slices in this segment (at most MI_SLICES_PER_SEGMENT)
+  size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
   mi_slice_t        slices[MI_SLICES_PER_SEGMENT];
 } mi_segment_t;
 
@@ -371,17 +383,19 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
 // Thread Local data
 // ------------------------------------------------------
 
-// Queue of segments
-typedef struct mi_segment_queue_s {
-  mi_segment_t* first;
-  mi_segment_t* last;
-} mi_segment_queue_t;
+// A "span" is is an available range of slices. The span queues keep
+// track of slice spans of at most the given `slice_count` (but more than the previous size class).
+typedef struct mi_span_queue_s {
+  mi_slice_t* first;
+  mi_slice_t* last;
+  size_t      slice_count;
+} mi_span_queue_t;
 
 #define MI_SEGMENT_BIN_MAX (35)     // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)
 
 // Segments thread local data
 typedef struct mi_segments_tld_s {
-  mi_page_queue_t     pages[MI_SEGMENT_BIN_MAX+1];  // free pages inside segments
+  mi_span_queue_t     spans[MI_SEGMENT_BIN_MAX+1];  // free slice spans inside segments
   size_t              count;        // current number of segments;
   size_t              peak_count;   // peak number of segments
   size_t              current_size; // current size of all segments
diff --git a/src/init.c b/src/init.c
index b0fa60cc..d4ba5572 100644
--- a/src/init.c
+++ b/src/init.c
@@ -21,7 +21,7 @@ const mi_page_t _mi_page_empty = {
   0,       // used
   NULL, 0, 0,
   0, NULL, NULL, NULL
-  #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0)
+  #if (MI_SECURE==0)
   , { NULL } // padding
   #endif
 };
@@ -68,6 +68,18 @@ const mi_page_t _mi_page_empty = {
   { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
   MI_STAT_COUNT_END_NULL()
 
+
+// Empty slice span queues for every bin
+#define SQNULL(sz)  { NULL, NULL, sz }
+#define MI_SEGMENT_SPAN_QUEUES_EMPTY \
+  { SQNULL(1), \
+    SQNULL(     1), SQNULL(     2), SQNULL(     3), SQNULL(     4), SQNULL(     5), SQNULL(     6), SQNULL(     7), SQNULL(    10), /*  8 */ \
+    SQNULL(    12), SQNULL(    14), SQNULL(    16), SQNULL(    20), SQNULL(    24), SQNULL(    28), SQNULL(    32), SQNULL(    40), /* 16 */ \
+    SQNULL(    48), SQNULL(    56), SQNULL(    64), SQNULL(    80), SQNULL(    96), SQNULL(   112), SQNULL(   128), SQNULL(   160), /* 24 */ \
+    SQNULL(   192), SQNULL(   224), SQNULL(   256), SQNULL(   320), SQNULL(   384), SQNULL(   448), SQNULL(   512), SQNULL(   640), /* 32 */ \
+    SQNULL(   768), SQNULL(   896), SQNULL(  1024) /* 35 */ }
+
+
 // --------------------------------------------------------
 // Statically allocate an empty heap as the initial
 // thread local value for the default heap,
@@ -89,25 +101,26 @@ const mi_heap_t _mi_heap_empty = {
   false
 };
 
+#define tld_empty_stats  ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
+
+static const mi_tld_t tld_empty = {
+  0,
+  NULL,
+  { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_empty_stats }, // segments
+  { 0, tld_empty_stats },             // os
+  { MI_STATS_NULL }                   // stats
+};
+
 mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
 
 
-// Empty page queues for every bin
-#define SQNULL(sz)  { NULL, NULL, sz }
-#define MI_SEGMENT_PAGE_QUEUES_EMPTY \
-  { SQNULL(1), \
-    SQNULL(     1), SQNULL(     2), SQNULL(     3), SQNULL(     4), SQNULL(     5), SQNULL(     6), SQNULL(     7), SQNULL(    10), /*  8 */ \
-    SQNULL(    12), SQNULL(    14), SQNULL(    16), SQNULL(    20), SQNULL(    24), SQNULL(    28), SQNULL(    32), SQNULL(    40), /* 16 */ \
-    SQNULL(    48), SQNULL(    56), SQNULL(    64), SQNULL(    80), SQNULL(    96), SQNULL(   112), SQNULL(   128), SQNULL(   160), /* 24 */ \
-    SQNULL(   192), SQNULL(   224), SQNULL(   256), SQNULL(   320), SQNULL(   384), SQNULL(   448), SQNULL(   512), SQNULL(   640), /* 32 */ \
-    SQNULL(   768), SQNULL(   896), SQNULL(  1024) /* 35 */ }
 
 #define tld_main_stats  ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
 
 static mi_tld_t tld_main = {
   0,
   &_mi_heap_main,
-  { MI_SEGMENT_PAGE_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
+  { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
   { 0, tld_main_stats },              // os
   { MI_STATS_NULL }                   // stats
 };
@@ -223,12 +236,12 @@ static bool _mi_heap_init(void) {
     }
     mi_tld_t*  tld = &td->tld;
     mi_heap_t* heap = &td->heap;
+    memcpy(tld, &tld_empty, sizeof(*tld));
     memcpy(heap, &_mi_heap_empty, sizeof(*heap));
     heap->thread_id = _mi_thread_id();
     heap->random = _mi_random_init(heap->thread_id);
     heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1;
-    heap->tld = tld;
-    memset(tld, 0, sizeof(*tld));
+    heap->tld = tld;    
     tld->heap_backing = heap;
     tld->segments.stats = &tld->stats;
     tld->os.stats = &tld->stats;
diff --git a/src/segment.c b/src/segment.c
index 64b9f4ac..3b3272a1 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -31,6 +31,27 @@ static void mi_segment_map_freed_at(const mi_segment_t* segment);
   be reclaimed by still running threads, much like work-stealing.
 ----------------------------------------------------------- */
 
+/* -----------------------------------------------------------
+   Slices
+----------------------------------------------------------- */
+
+static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) {
+  return &segment->slices[segment->slice_entries];
+}
+
+/*
+static uint8_t* mi_slice_start(const mi_slice_t* slice) {
+  mi_segment_t* segment = _mi_ptr_segment(slice);
+  mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment));
+  return ((uint8_t*)segment + ((slice - segment->slices)*MI_SEGMENT_SLICE_SIZE));
+}
+
+
+static size_t mi_slices_in(size_t size) {
+  return (size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE;
+}
+*/
+
 /* -----------------------------------------------------------
    Bins
 ----------------------------------------------------------- */
@@ -67,7 +88,7 @@ static size_t mi_slice_bin8(size_t slice_count) {
 
 static size_t mi_slice_bin(size_t slice_count) {
   mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE);
-  mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) == MI_SEGMENT_BIN_MAX);
+  mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) <= MI_SEGMENT_BIN_MAX);
   size_t bin = (slice_count==0 ? 0 : mi_slice_bin8(slice_count));
   mi_assert_internal(bin <= MI_SEGMENT_BIN_MAX);
   return bin;
@@ -76,62 +97,43 @@ static size_t mi_slice_bin(size_t slice_count) {
 static size_t mi_slice_index(const mi_slice_t* slice) {
   mi_segment_t* segment = _mi_ptr_segment(slice);
   ptrdiff_t index = slice - segment->slices;
-  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count);
+  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_entries);
   return index;
 }
 
 
 /* -----------------------------------------------------------
-   Page Queues
+   Slice span queues
 ----------------------------------------------------------- */
-/*
-static bool mi_page_queue_is_empty(mi_page_queue_t* pq) {
-  return (pq->first == NULL);
-}
 
-static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq)
-{
-  mi_page_t* page = pq->first;
-  if (page==NULL) return NULL;
-  mi_assert_internal(page->prev==NULL);
-  pq->first = page->next;
-  if (page->next == NULL) pq->last = NULL;
-  else page->next->prev = NULL;
-  page->next = NULL;
-  page->prev = NULL;    // paranoia
-  page->block_size = 1; // no more free
-  return page;
-}
-*/
-
-static void mi_page_queue_enqueue(mi_page_queue_t* pq, mi_page_t* page) {
+static void mi_span_queue_push(mi_span_queue_t* sq, mi_slice_t* slice) {
   // todo: or push to the end?
-  mi_assert_internal(page->prev == NULL && page->next==NULL);
-  page->prev = NULL; // paranoia
-  page->next = pq->first;
-  pq->first = page;
-  if (page->next != NULL) page->next->prev = page;
-                     else pq->last = page;
-  page->block_size = 0; // free
+  mi_assert_internal(slice->prev == NULL && slice->next==NULL);
+  slice->prev = NULL; // paranoia
+  slice->next = sq->first;
+  sq->first = slice;
+  if (slice->next != NULL) slice->next->prev = slice;
+                     else sq->last = slice;
+  slice->block_size = 0; // free
 }
 
-static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) {
+static mi_span_queue_t* mi_span_queue_for(size_t slice_count, mi_segments_tld_t* tld) {
   size_t bin = mi_slice_bin(slice_count);
-  mi_page_queue_t* pq = &tld->pages[bin];
-  // mi_assert_internal(pq->block_size >= slice_count);
-  return pq;
+  mi_span_queue_t* sq = &tld->spans[bin];
+  mi_assert_internal(sq->slice_count >= slice_count);
+  return sq;
 }
 
-static void mi_page_queue_delete(mi_page_queue_t* pq, mi_page_t* page) {
-  mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0);
-  // should work too if the queue does not contain page (which can happen during reclaim)
-  if (page->prev != NULL) page->prev->next = page->next;
-  if (page == pq->first) pq->first = page->next;
-  if (page->next != NULL) page->next->prev = page->prev;
-  if (page == pq->last) pq->last = page->prev;
-  page->prev = NULL;
-  page->next = NULL;
-  page->block_size = 1; // no more free
+static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) {
+  mi_assert_internal(slice->block_size==0 && slice->slice_count>0 && slice->slice_offset==0);
+  // should work too if the queue does not contain slice (which can happen during reclaim)
+  if (slice->prev != NULL) slice->prev->next = slice->next;
+  if (slice == sq->first) sq->first = slice->next;
+  if (slice->next != NULL) slice->next->prev = slice->prev;
+  if (slice == sq->last) sq->last = slice->prev;
+  slice->prev = NULL;
+  slice->next = NULL;
+  slice->block_size = 1; // no more free
 }
 
 
@@ -140,9 +142,9 @@ static void mi_page_queue_delete(mi_page_queue_t* pq, mi_page_t* page) {
 ----------------------------------------------------------- */
 
 #if (MI_DEBUG > 1)
-static bool mi_segment_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) {
-  for (mi_page_t* p = pq->first; p != NULL; p = p->next) {
-    if (p==page) return true;
+static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) {
+  for (mi_slice_t* s = sq->first; s != NULL; s = s->next) {
+    if (s==slice) return true;
   }
   return false;
 }
@@ -154,34 +156,42 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
   //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
   mi_slice_t* slice = &segment->slices[0];
+  const mi_slice_t* end = mi_segment_slices_end(segment);
   size_t used_count = 0;
-  mi_page_queue_t* pq;
-  while(slice < &segment->slices[segment->slice_count]) {
+  mi_span_queue_t* sq;
+  while(slice < end) {
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);
     size_t index = mi_slice_index(slice);
-    size_t maxindex = (index + slice->slice_count >= segment->slice_count ? segment->slice_count : index + slice->slice_count) - 1;
-    if (slice->block_size > 0) { // a page in use, all slices need their back offset set
+    size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1;
+    if (slice->block_size > 0) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets
       used_count++;
-      for (size_t i = index; i <= maxindex; i++) {
-        mi_assert_internal(segment->slices[i].slice_offset == (i - index)*sizeof(mi_page_t));
-        mi_assert_internal(i==index || segment->slices[i].slice_count == 0);
-        mi_assert_internal(i==index || segment->slices[i].block_size == 1);
+      for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) {
+        mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t));
+        mi_assert_internal(i==0 || segment->slices[index + i].slice_count == 0);
+        mi_assert_internal(i==0 || segment->slices[index + i].block_size == 1);
+      }
+      // and the last entry as well (for coalescing)
+      const mi_slice_t* last = slice + slice->slice_count - 1;
+      if (last > slice && last < mi_segment_slices_end(segment)) {
+        mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t));
+        mi_assert_internal(last->slice_count == 0);
+        mi_assert_internal(last->block_size == 1);
       }
     }
     else {  // free range of slices; only last slice needs a valid back offset
-      mi_slice_t* end = &segment->slices[maxindex];
-      mi_assert_internal((uint8_t*)slice == (uint8_t*)end - end->slice_offset);
-      mi_assert_internal(slice == end || end->slice_count == 0 );
-      mi_assert_internal(end->block_size == 0);
-      if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) {
-        pq = mi_page_queue_for(slice->slice_count,tld);
-        mi_assert_internal(mi_segment_page_queue_contains(pq,mi_slice_to_page(slice)));
+      mi_slice_t* last = &segment->slices[maxindex];
+      mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset);
+      mi_assert_internal(slice == last || last->slice_count == 0 );
+      mi_assert_internal(last->block_size == 0);
+      if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { // segment is not huge or abandonded
+        sq = mi_span_queue_for(slice->slice_count,tld);
+        mi_assert_internal(mi_span_queue_contains(sq,slice));
       }
     }
     slice = &segment->slices[maxindex+1];
   }
-  mi_assert_internal(slice == &segment->slices[segment->slice_count]);
+  mi_assert_internal(slice == end);
   mi_assert_internal(used_count == segment->used + 1);
   return true;
 }
@@ -191,13 +201,20 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
  Segment size calculations
 ----------------------------------------------------------- */
 
+static size_t mi_segment_size(mi_segment_t* segment) {
+  return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
+}
+static size_t mi_segment_info_size(mi_segment_t* segment) {
+  return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
+}
+
 // Start of the page available memory; can be used on uninitialized pages
 uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
 {
-  mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
-  ptrdiff_t idx     = slice - segment->slices;
-  size_t psize      = slice->slice_count*MI_SEGMENT_SLICE_SIZE;
-  uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
+  const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
+  ptrdiff_t idx = slice - segment->slices;
+  size_t psize  = slice->slice_count*MI_SEGMENT_SLICE_SIZE;
+  uint8_t* p    = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
   /*
   if (idx == 0) {
     // the first page starts after the segment info (and possible guard page)
@@ -216,7 +233,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
   */
 
   long secure = mi_option_get(mi_option_secure);
-  if (secure > 1 || (secure == 1 && slice == &segment->slices[segment->slice_count - 1])) {
+  if (secure > 1 || (secure == 1 && slice == &segment->slices[segment->slice_entries - 1])) {
     // secure == 1: the last page has an os guard page at the end
     // secure >  1: every page has an os guard page
     psize -= _mi_os_page_size();
@@ -228,7 +245,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
   return p;
 }
 
-static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_size) {
+static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) {
   size_t page_size = _mi_os_page_size();
   size_t isize     = _mi_align_up(sizeof(mi_segment_t), page_size);
   size_t guardsize = 0;
@@ -240,12 +257,12 @@ static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_si
     required  = _mi_align_up(required, page_size);
   }
 ;
-  if (info_size != NULL) *info_size = isize;
-  if (pre_size != NULL)  *pre_size = isize + guardsize;
+  if (pre_size != NULL) *pre_size = isize;
   isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE);
-  size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) );
+  if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE;
+  size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) );  
   mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0);
-  return segment_size;
+  return (segment_size / MI_SEGMENT_SLICE_SIZE);
 }
 
 
@@ -268,11 +285,11 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
 static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   segment->thread_id = 0;
   mi_segment_map_freed_at(segment);
-  mi_segments_track_size(-((long)segment->segment_size),tld);
+  mi_segments_track_size(-((long)mi_segment_size(segment)),tld);
   if (mi_option_is_enabled(mi_option_secure)) {
-    _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
+    _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
   }
-  _mi_os_free(segment, segment->segment_size, /*segment->memid,*/ tld->stats);
+  _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
 }
 
 
@@ -282,14 +299,14 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
 #define MI_SEGMENT_CACHE_FRACTION (8)
 
 // note: returned segment may be partially reset
-static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t* tld) {
-  if (segment_size != 0 && segment_size != MI_SEGMENT_SIZE) return NULL;
+static mi_segment_t* mi_segment_cache_pop(size_t segment_slices, mi_segments_tld_t* tld) {
+  if (segment_slices != 0 && segment_slices != MI_SLICES_PER_SEGMENT) return NULL;
   mi_segment_t* segment = tld->cache;
   if (segment == NULL) return NULL;
   tld->cache_count--;
   tld->cache = segment->next;
   segment->next = NULL;
-  mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
+  mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT);
   _mi_stat_decrease(&tld->stats->segments_cache, 1);
   return segment;
 }
@@ -312,12 +329,12 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
 
 static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->next == NULL);
-  if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
+  if (segment->segment_slices != MI_SLICES_PER_SEGMENT || mi_segment_cache_full(tld)) {
     return false;
   }
-  mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
+  mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT);
   if (mi_option_is_enabled(mi_option_cache_reset)) {
-    _mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
+    _mi_os_reset((uint8_t*)segment + mi_segment_info_size(segment), mi_segment_size(segment) - mi_segment_info_size(segment), tld->stats);
   }
   segment->next = tld->cache;
   tld->cache = segment;
@@ -337,182 +354,232 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
 }
 
 
-/* -----------------------------------------------------------
-   Slices
------------------------------------------------------------ */
-
-
-static uint8_t* mi_slice_start(const mi_slice_t* slice) {
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  return ((uint8_t*)segment + (mi_slice_index(slice)*MI_SEGMENT_SLICE_SIZE));
-}
-
-static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) {
-  return &segment->slices[segment->slice_count-1];
-}
-
-static size_t mi_slices_in(size_t size) {
-  return (size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE;
-}
 
 /* -----------------------------------------------------------
-   Page management
+   Span management
 ----------------------------------------------------------- */
 
-
-static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice_index < segment->slice_count);
-  mi_page_queue_t* pq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_page_queue_for(slice_count,tld));
+static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
+  mi_assert_internal(slice_index < segment->slice_entries);
+  mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_span_queue_for(slice_count,tld));
   if (slice_count==0) slice_count = 1;
-  mi_assert_internal(slice_index + slice_count - 1 < segment->slice_count);
+  mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries);
 
   // set first and last slice (the intermediates can be undetermined)
   mi_slice_t* slice = &segment->slices[slice_index];
   slice->slice_count = (uint32_t)slice_count;
+  mi_assert_internal(slice->slice_count == slice_count); // no overflow?
   slice->slice_offset = 0;
   if (slice_count > 1) {
-    mi_slice_t* end = &segment->slices[slice_index + slice_count - 1];
-    end->slice_count = 0;
-    end->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1));
-    end->block_size = 0;
+    mi_slice_t* last = &segment->slices[slice_index + slice_count - 1];
+    last->slice_count = 0;
+    last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1));
+    last->block_size = 0;
   }
   // and push it on the free page queue (if it was not a huge page)
-  if (pq != NULL) mi_page_queue_enqueue( pq, mi_slice_to_page(slice) );
+  if (sq != NULL) mi_span_queue_push( sq, slice );
              else slice->block_size = 0; // mark huge page as free anyways
 }
 
-static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) {
-  mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0);
-  size_t slice_index = mi_slice_index(mi_page_to_slice(page));
-  mi_segment_page_init(segment,slice_index,page->slice_count,tld);
+// called from reclaim to add existing free spans
+static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) {
+  mi_segment_t* segment = _mi_ptr_segment(slice);
+  mi_assert_internal(slice->block_size==0 && slice->slice_count>0 && slice->slice_offset==0);
+  size_t slice_index = mi_slice_index(slice);
+  mi_segment_span_free(segment,slice_index,slice->slice_count,tld);
+}
 
+static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) {
+  mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0);
+  mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE);
+  mi_span_queue_t* sq = mi_span_queue_for(slice->slice_count, tld);
+  mi_span_queue_delete(sq, slice);
 }
 
 
-static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segments_tld_t* tld) {
-  mi_assert_internal(page->slice_count >= slice_count);
-  mi_assert_internal(page->block_size > 0); // no more in free queue
-  if (page->slice_count <= slice_count) return;
-  mi_segment_t* segment = _mi_page_segment(page);
+static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) {
+  mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0 && slice->block_size > 0);
+  mi_segment_t* segment = _mi_ptr_segment(slice);
+  mi_assert_internal(segment->used > 0);
+  segment->used--;
+
+  // for huge pages, just mark as free but don't add to the queues
+  if (segment->kind == MI_SEGMENT_HUGE) {
+    mi_assert_internal(segment->used == 0);
+    slice->block_size = 0;  // mark as free anyways
+    return slice;
+  }
+
+  // otherwise coalesce the span and add to the free span queues
+  size_t slice_count = slice->slice_count;
+  mi_slice_t* next = slice + slice->slice_count;
+  mi_assert_internal(next <= mi_segment_slices_end(segment));
+  if (next < mi_segment_slices_end(segment) && next->block_size==0) {
+    // free next block -- remove it from free and merge
+    mi_assert_internal(next->slice_count > 0 && next->slice_offset==0);
+    slice_count += next->slice_count; // extend
+    mi_segment_span_remove_from_queue(next, tld);
+  }
+  if (slice > segment->slices) {
+    mi_slice_t* prev = mi_slice_first(slice - 1);
+    mi_assert_internal(prev >= segment->slices);
+    if (prev->block_size==0) {
+      // free previous slice -- remove it from free and merge
+      mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
+      slice_count += prev->slice_count;
+      mi_segment_span_remove_from_queue(prev, tld);
+      slice = prev;
+    }
+  }
+
+  // and add the new free page
+  mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld);
+  mi_assert_expensive(mi_segment_is_valid(segment, tld));
+  return slice;
+}
+
+
+static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
+  mi_assert_internal(_mi_ptr_segment(slice)==segment);
+  mi_assert_internal(slice->slice_count >= slice_count);
+  mi_assert_internal(slice->block_size > 0); // no more in free queue
+  if (slice->slice_count <= slice_count) return;
   mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
-  size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count;
-  size_t next_count = page->slice_count - slice_count;
-  mi_segment_page_init( segment, next_index, next_count, tld );
-  page->slice_count = (uint32_t)slice_count;
+  size_t next_index = mi_slice_index(slice) + slice_count;
+  size_t next_count = slice->slice_count - slice_count;
+  mi_segment_span_free(segment, next_index, next_count, tld);
+  slice->slice_count = (uint32_t)slice_count;
 }
 
-static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) {
+
+static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count) {
+  mi_assert_internal(slice_index < segment->slice_entries);
+  mi_slice_t* slice = &segment->slices[slice_index];
+  mi_assert_internal(slice->block_size==0 || slice->block_size==1);
+  slice->slice_offset = 0;
+  slice->slice_count = (uint32_t)slice_count;
+  mi_assert_internal(slice->slice_count == slice_count);
+  slice->block_size = slice_count * MI_SEGMENT_SLICE_SIZE;
+  mi_page_t*  page = mi_slice_to_page(slice);
+
+  // set slice back pointers for the first MI_MAX_SLICE_OFFSET entries
+  size_t extra = slice_count-1;
+  if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET;
+  if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1;  // huge objects may have more slices than avaiable entries in the segment->slices
+  slice++;
+  for (size_t i = 1; i <= extra; i++, slice++) {
+    slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
+    slice->slice_count = 0;
+    slice->block_size = 1;
+  }
+
+  // and also for the last one (if not set already) (the last one is needed for coalescing)
+  mi_slice_t* last = &segment->slices[slice_index + slice_count - 1];
+  if (last < mi_segment_slices_end(segment) && last >= slice) {
+    last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1));
+    last->slice_count = 0;
+    last->block_size = 1;
+  }
+
+  segment->used++;
+  return page;
+}
+
+static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX);
   // search from best fit up
-  mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld);
+  mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld);
   if (slice_count == 0) slice_count = 1;
-  while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX]) {
-    for( mi_page_t* page = pq->first; page != NULL; page = page->next) {
-      if (page->slice_count >= slice_count) {
+  while (sq <= &tld->spans[MI_SEGMENT_BIN_MAX]) {
+    for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) {
+      if (slice->slice_count >= slice_count) {
         // found one
-        mi_page_queue_delete(pq,page);
-        if (page->slice_count > slice_count) {
-          mi_segment_page_split(page,slice_count,tld);
+        mi_span_queue_delete(sq, slice);
+        mi_segment_t* segment = _mi_ptr_segment(slice);
+        if (slice->slice_count > slice_count) {
+          mi_segment_slice_split(segment, slice, slice_count, tld);
         }
-        mi_assert_internal(page != NULL && page->slice_count == slice_count);
-        return page;
+        mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->block_size > 0);
+        return mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count);
       }
     }
-    pq++;
+    sq++;
   }
   // could not find a page..
   return NULL;
 }
 
-static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0);
-  mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE);
-  mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld);
-  mi_page_queue_delete(pq, mi_slice_to_page(slice));
-}
-
 
 /* -----------------------------------------------------------
    Segment allocation
 ----------------------------------------------------------- */
 
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
 {
   // calculate needed sizes first
-  size_t info_size;
+  size_t info_slices;
   size_t pre_size;
-  size_t segment_size = mi_segment_size(required, &pre_size, &info_size);
-  size_t slice_count = mi_slices_in(segment_size);
-  if (slice_count > MI_SLICES_PER_SEGMENT) slice_count = MI_SLICES_PER_SEGMENT;
-  mi_assert_internal(segment_size - _mi_align_up(sizeof(mi_segment_t),MI_SEGMENT_SLICE_SIZE) >= required);
-  mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0);
-  //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0);
+  size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
+  size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
+  size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
 
   // Try to get it from our thread local cache first
   bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit)
                 || required > 0; // huge page
-  mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
+  mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld);
   if (segment==NULL) {
     // Allocate the segment from the OS
-    size_t memid = 0;
     segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, /* &memid,*/ os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {
-      _mi_os_commit(segment, info_size, tld->stats);
+      _mi_os_commit(segment, info_slices*MI_SEGMENT_SLICE_SIZE, tld->stats);
     }
-    segment->memid = memid;
-    mi_segments_track_size((long)segment_size, tld);
+    mi_segments_track_size((long)(segment_size), tld);
     mi_segment_map_allocated_at(segment);
   }
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
-  // zero the segment info
-  { size_t memid = segment->memid;
-    memset(segment, 0, info_size);
-    segment->memid = memid;
-  }
+  // zero the segment info? -- not needed as it is zero initialized from the OS 
+  // memset(segment, 0, info_size);  
 
   if (mi_option_is_enabled(mi_option_secure)) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data
-    mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0);
-    _mi_os_protect((uint8_t*)segment + info_size, (pre_size - info_size));
     size_t os_page_size = _mi_os_page_size();
+    size_t info_size = (info_slices * MI_SEGMENT_SLICE_SIZE);
+    mi_assert_internal(info_size - os_page_size >= pre_size);
+    _mi_os_protect((uint8_t*)segment + info_size - os_page_size, os_page_size);
     // and protect the last page too
     _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size);
-    slice_count--; // don't use the last slice :-(
+    if (slice_entries == segment_slices) slice_entries--; // don't use the last slice :-(
   }
 
   // initialize segment info
-  segment->segment_size = segment_size;
-  segment->segment_info_size = pre_size;
+  segment->segment_slices = segment_slices;
+  segment->segment_info_slices = info_slices;
   segment->thread_id = _mi_thread_id();
   segment->cookie = _mi_ptr_cookie(segment);
-  segment->slice_count = slice_count;
-  segment->all_committed = commit;
+  segment->slice_entries = slice_entries;
+  
   segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
-  _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
+  _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));
 
   // reserve first slices for segment info
-  size_t islice_count = (segment->segment_info_size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE;
-  for (size_t i = 0; i < islice_count; i++) {
-    mi_slice_t* slice = &segment->slices[i];
-    if (i==0) {
-      slice->slice_count = (uint32_t)islice_count;
-      slice->block_size = islice_count * MI_SEGMENT_SLICE_SIZE;
-    }
-    else {
-      slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i);
-      slice->block_size = 1;
-    }
-  }
-
+  mi_segment_span_allocate(segment,0,info_slices);
+  mi_assert_internal(segment->used == 1);
+  segment->used = 0; // don't count our internal slices towards usage
+  
   // initialize initial free pages
   if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
-    mi_segment_page_init(segment, islice_count, segment->slice_count - islice_count, tld);
+    mi_assert_internal(huge_page==NULL);
+    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, tld);
   }
+  else {
+    mi_assert_internal(huge_page!=NULL);
+    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices);
+  }
+
   return segment;
 }
 
@@ -520,18 +587,18 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
 static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
   mi_assert_internal(segment != NULL);
   mi_assert_internal(segment->next == NULL);
-  mi_assert_internal(segment->prev == NULL);
   mi_assert_internal(segment->used == 0);
 
   // Remove the free pages
   mi_slice_t* slice = &segment->slices[0];
+  const mi_slice_t* end = mi_segment_slices_end(segment);
   size_t page_count = 0;
-  while (slice <= mi_segment_last_slice(segment)) {
+  while (slice < end) {
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);
     mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages ..
     if (slice->block_size == 0 && segment->kind != MI_SEGMENT_HUGE) {
-      mi_segment_page_delete(slice, tld);
+      mi_segment_span_remove_from_queue(slice, tld);
     }
     page_count++;
     slice = slice + slice->slice_count;
@@ -539,7 +606,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
   mi_assert_internal(page_count == 2); // first page is allocated by the segment itself
 
   // stats
-  _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
+  _mi_stat_decrease(&tld->stats->page_committed, mi_segment_info_size(segment));
 
   if (!force && mi_segment_cache_push(segment, tld)) {
     // it is put in our cache
@@ -554,83 +621,24 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
    Page allocation
 ----------------------------------------------------------- */
 
-static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
   mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
 
   // find a free page
   size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
   size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
-  mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
+  mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
   if (page==NULL) {
     // no free page, allocate a new segment and try again
-    if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL;  // OOM
-    return mi_segment_page_alloc(page_kind, required, tld, os_tld);
+    if (mi_segment_alloc(0, tld, os_tld, NULL) == NULL) return NULL;  // OOM
+    return mi_segments_page_alloc(page_kind, required, tld, os_tld);
   }
   mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
-
-  // set slice back pointers and commit/unreset
-  mi_segment_t* segment = _mi_page_segment(page);
-  mi_slice_t* slice = mi_page_to_slice(page);
-  bool commit = false;
-  bool unreset = false;
-  for (size_t i = 0; i < page->slice_count; i++, slice++) {
-    slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i);
-    slice->block_size = 1;
-    if (i > 0) slice->slice_count = 0;
-    if (!segment->all_committed && !slice->is_committed) {
-      slice->is_committed = true;
-      commit = true;
-    }
-    if (slice->is_reset) {
-      slice->is_reset = false;
-      unreset = true;
-    }
-  }
-  uint8_t* page_start = mi_slice_start(mi_page_to_slice(page));
-  if(commit) { _mi_os_commit(page_start, page_size, tld->stats); }
-  if(unreset){ _mi_os_unreset(page_start, page_size, tld->stats); }
-
-  // initialize the page and return
-  mi_assert_internal(segment->thread_id == _mi_thread_id());
-  segment->used++;
+  mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());  
   return page;
 }
 
-static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) {
-  mi_assert_internal(page != NULL && page->slice_count > 0 && page->slice_offset == 0 && page->block_size > 0);
-  mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_internal(segment->used > 0);
-  segment->used--;
-
-  // free and coalesce the page
-  mi_slice_t* slice = mi_page_to_slice(page);
-  size_t slice_count = slice->slice_count;
-  mi_slice_t* next = slice + slice->slice_count;
-  mi_assert_internal(next <= mi_segment_last_slice(segment) + 1);
-  if (next <= mi_segment_last_slice(segment) && next->block_size==0) {
-    // free next block -- remove it from free and merge
-    mi_assert_internal(next->slice_count > 0 && next->slice_offset==0);
-    slice_count += next->slice_count; // extend
-    mi_segment_page_delete(next, tld);
-  }
-  if (slice > segment->slices) {
-    mi_slice_t* prev = mi_slice_first(slice - 1);
-    mi_assert_internal(prev >= segment->slices);
-    if (prev->block_size==0) {
-      // free previous slice -- remove it from free and merge
-      mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
-      slice_count += prev->slice_count;
-      mi_segment_page_delete(prev, tld);
-      slice = prev;
-    }
-  }
-
-  // and add the new free page
-  mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld);
-  mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  return slice;
-}
 
 
 /* -----------------------------------------------------------
@@ -643,7 +651,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
   mi_assert_internal(page->block_size > 0);
   mi_assert_internal(mi_page_all_free(page));
   mi_segment_t* segment = _mi_ptr_segment(page);
-  mi_assert_internal(segment->all_committed || page->is_committed);
+  
   size_t inuse = page->capacity * page->block_size;
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
@@ -667,20 +675,13 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
   page->block_size = 1;
 
   // and free it
-  if (segment->kind != MI_SEGMENT_HUGE) {
-    return mi_segment_page_free_coalesce(page, tld);
-  }
-  else {
-    mi_assert_internal(segment->used == 1);
-    segment->used--;
-    page->block_size = 0;  // pretend free
-    return mi_page_to_slice(page);
-  }
+  return mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);  
 }
 
 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 {
   mi_assert(page != NULL);
+
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
 
@@ -717,11 +718,12 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 
   // remove the free pages from our lists
   mi_slice_t* slice = &segment->slices[0];
-  while (slice <= mi_segment_last_slice(segment)) {
+  const mi_slice_t* end = mi_segment_slices_end(segment);
+  while (slice < end) {
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);
     if (slice->block_size == 0) { // a free page
-      mi_segment_page_delete(slice,tld);
+      mi_segment_span_remove_from_queue(slice,tld);
       slice->block_size = 0; // but keep it free
     }
     slice = slice + slice->slice_count;
@@ -729,8 +731,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 
   // add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
-  mi_segments_track_size(-((long)segment->segment_size), tld);
-
+  mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
   segment->thread_id = 0;
   mi_segment_t* next;
   do {
@@ -778,19 +779,19 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     mi_assert_expensive(mi_segment_is_valid(segment, tld));
     segment->abandoned_next = NULL;
     segment->thread_id = _mi_thread_id();
-    mi_segments_track_size((long)segment->segment_size,tld);
-    mi_assert_internal(segment->next == NULL && segment->prev == NULL);
+    mi_segments_track_size((long)mi_segment_size(segment),tld);
+    mi_assert_internal(segment->next == NULL);
     _mi_stat_decrease(&tld->stats->segments_abandoned,1);
 
     mi_slice_t* slice = &segment->slices[0];
+    const mi_slice_t* end = mi_segment_slices_end(segment);
     mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page
     slice = slice + slice->slice_count; // skip the first segment allocated page
-    while (slice <= mi_segment_last_slice(segment)) {
+    while (slice < end) {
       mi_assert_internal(slice->slice_count > 0);
       mi_assert_internal(slice->slice_offset == 0);
-      mi_page_t* page = mi_slice_to_page(slice);
-      if (page->block_size == 0) { // a free page, add it to our lists
-        mi_segment_page_add_free(page,tld);
+      if (slice->block_size == 0) { // a free page, add it to our lists
+        mi_segment_span_add_free(slice,tld);
       }
       slice = slice + slice->slice_count;
     }
@@ -798,7 +799,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     slice = &segment->slices[0];
     mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page
     slice = slice + slice->slice_count; // skip the first segment allocated page
-    while (slice <= mi_segment_last_slice(segment)) {
+    while (slice < end) {
       mi_assert_internal(slice->slice_count > 0);
       mi_assert_internal(slice->slice_offset == 0);
       mi_page_t* page = mi_slice_to_page(slice);
@@ -837,27 +838,11 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 
 static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
-  mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld);
-  if (segment == NULL) return NULL;
-  mi_assert_internal(segment->segment_size - segment->segment_info_size >= size);
-  segment->used = 1;
-
-  mi_page_t* page = mi_slice_to_page(&segment->slices[0]);
-  mi_assert_internal(page->block_size > 0 && page->slice_count > 0);
-  size_t initial_count = page->slice_count;
-  page = page + initial_count;
-  page->slice_count  = (uint32_t)((segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE);
-  page->slice_offset = 0;
-  page->block_size = size;
-  mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size);
-  mi_assert_internal(page->slice_count >= segment->slice_count - initial_count);
-  // set back pointers
-  for (size_t i = 1; i <segment->slice_count; i++) {
-    mi_slice_t* slice = (mi_slice_t*)(page + i);
-    slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i);
-    slice->block_size = 1;
-    slice->slice_count = 0;
-  }
+  mi_page_t* page = NULL;
+  mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page);
+  if (segment == NULL || page==NULL) return NULL;
+  mi_assert_internal(segment->used==1);
+  mi_assert_internal(page->block_size >= size);
   return page;
 }
 
@@ -874,13 +859,13 @@ static bool mi_is_good_fit(size_t bsize, size_t size) {
 mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_page_t* page;
   if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {// || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
-    page = mi_segment_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld);
+    page = mi_segments_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld);
   }
   else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {// || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
-    page = mi_segment_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld);
+    page = mi_segments_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld);
   }
   else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {
-    page = mi_segment_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld);
+    page = mi_segments_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld);
   }
   else {
     page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
@@ -894,12 +879,12 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
   The following functions are to reliably find the segment or
   block that encompasses any pointer p (or NULL if it is not
   in any of our segments).
-  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (128mb)
+  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
   set to 1 if it contains the segment meta data.
 ----------------------------------------------------------- */
 
 #if (MI_INTPTR_SIZE==8)
-#define MI_MAX_ADDRESS    ((size_t)1 << 44)   // 16TB
+#define MI_MAX_ADDRESS    ((size_t)20 << 40)  // 20TB
 #else
 #define MI_MAX_ADDRESS    ((size_t)1 << 31)   // 2Gb
 #endif
@@ -908,10 +893,10 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
 #define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
 #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
 
-static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE];  // 1KiB per TB with 128MiB segments
+static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE];  // 2KiB per TB with 64MiB segments
 
 static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
-  mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB?
+  mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
   uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE;
   *bitidx = segindex % (8*MI_INTPTR_SIZE);
   return (segindex / (8*MI_INTPTR_SIZE));
@@ -948,13 +933,14 @@ static mi_segment_t* _mi_segment_of(const void* p) {
   mi_segment_t* segment = _mi_ptr_segment(p);
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
-  // fast path: for any pointer to valid small/medium/large object or first 4MiB in huge
+  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
   if (mi_likely((mi_segment_map[index] & ((uintptr_t)1 << bitidx)) != 0)) {
     return segment; // yes, allocated by us
   }
   if (index==0) return NULL;
   // search downwards for the first segment in case it is an interior pointer
-  // could be slow but searches in 256MiB steps trough valid huge objects
+  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (4GiB) steps trough 
+  // valid huge objects
   // note: we could maintain a lowest index to speed up the path for invalid pointers?
   size_t lobitidx;
   size_t loindex;
@@ -978,8 +964,8 @@ static mi_segment_t* _mi_segment_of(const void* p) {
   bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(cookie_ok);
   if (mi_unlikely(!cookie_ok)) return NULL;
-  if (((uint8_t*)segment + segment->segment_size) <= (uint8_t*)p) return NULL; // outside the range
-  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + segment->segment_size);
+  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
+  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
   return segment;
 }
 
diff --git a/test/test-stress.c b/test/test-stress.c
index ad487538..a4f223e2 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -63,7 +63,11 @@ static bool chance(size_t perc, random_t r) {
 }
 
 static void* alloc_items(size_t items, random_t r) {
-  if (chance(1, r)) items *= 100; // 1% huge objects;
+  if (chance(1, r)) {
+    if (chance(1,r)) items *= 1000;       // 0.01% giant
+    else if (chance(10,r)) items *= 100;  // 0.1% huge
+                      else items *= 10;   // 1% large objects;
+  }
   if (items==40) items++;              // pthreads uses that size for stack increases
   uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
   for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;

From 6f5492cef801badbfb8cb7a2acfdbd5295590f22 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 24 Aug 2019 15:00:55 -0700
Subject: [PATCH 009/352] enable initial lazy commit and optional decommit to
 reduce commit charge with many threads

---
 include/mimalloc-internal.h |   5 ++
 include/mimalloc-types.h    |   6 +-
 include/mimalloc.h          |   6 +-
 src/options.c               |   6 +-
 src/os.c                    |  14 ++--
 src/segment.c               | 134 +++++++++++++++++++++++++++---------
 6 files changed, 125 insertions(+), 46 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index bf5c2e04..ce9f6d07 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -191,6 +191,11 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
   }
 }
 
+static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
+  return (sz / alignment) * alignment;
+}
+
+
 // Align a byte size to a size in _machine words_,
 // i.e. byte size == `wsize*sizeof(void*)`.
 static inline size_t _mi_wsize_from_size(size_t size) {
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 78b643ad..81ac9d54 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -84,7 +84,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Derived constants
 #define MI_SEGMENT_SIZE                   ((size_t)1<<MI_SEGMENT_SHIFT)
 #define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
-#define MI_SEGMENT_SLICE_SIZE             ((size_t)1 << MI_SEGMENT_SLICE_SHIFT)
+#define MI_SEGMENT_SLICE_SIZE             ((size_t)1<< MI_SEGMENT_SLICE_SHIFT)
 #define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
 
 #define MI_SMALL_PAGE_SIZE                (1<<MI_SMALL_PAGE_SHIFT)
@@ -209,9 +209,9 @@ typedef enum mi_segment_kind_e {
   MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
 } mi_segment_kind_t;
 
-#define MI_COMMIT_SIZE      ((size_t)2 << 20)   // OS large page size
+#define MI_COMMIT_SIZE      (2UL<<20)   // OS large page size
 
-#if ((MI_SEGMENT_SIZE / MI_COMMIT_SIZE) > MI_INTPTR_SIZE)
+#if ((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE > 8*MI_INTPTR_SIZE)
 #error "not enough commit bits to cover the segment size"
 #endif
 
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 7000cd42..c1a3bbe6 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -225,9 +225,9 @@ typedef enum mi_option_e {
   mi_option_verbose,
   // the following options are experimental
   mi_option_secure,
-  mi_option_eager_commit,
-  mi_option_eager_region_commit,
-  mi_option_large_os_pages,         // implies eager commit
+  mi_option_lazy_commit,
+  mi_option_decommit,
+  mi_option_large_os_pages,         
   mi_option_reserve_huge_os_pages,
   mi_option_page_reset,
   mi_option_cache_reset,
diff --git a/src/options.c b/src/options.c
index ff65c3f5..c56b6bd7 100644
--- a/src/options.c
+++ b/src/options.c
@@ -55,12 +55,12 @@ static mi_option_desc_t options[_mi_option_last] =
   #endif
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // note: if eager_region_commit is on, this should be on too.
   #ifdef _WIN32   // and BSD?
-  { 1, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
+  { 1, UNINIT, MI_OPTION(lazy_commit) },
   #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) },
+  { 0, UNINIT, MI_OPTION(lazy_commit) },
   #endif
+  { 0, UNINIT, MI_OPTION(decommit) },
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(page_reset) },
diff --git a/src/os.c b/src/os.c
index a1b6cdf3..9ad595d5 100644
--- a/src/os.c
+++ b/src/os.c
@@ -44,10 +44,6 @@ static void* mi_align_up_ptr(void* p, size_t alignment) {
   return (void*)_mi_align_up((uintptr_t)p, alignment);
 }
 
-static uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
-  return (sz / alignment) * alignment;
-}
-
 static void* mi_align_down_ptr(void* p, size_t alignment) {
   return (void*)_mi_align_down((uintptr_t)p, alignment);
 }
@@ -195,10 +191,14 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
 }
 
 #ifdef _WIN32
+ 
+#define MEM_COMMIT_RESERVE  (MEM_COMMIT|MEM_RESERVE)
+
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
 #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
-  if ((size % (uintptr_t)1 << 30) == 0 /* 1GiB multiple */
+  if ((flags&MEM_COMMIT_RESERVE)==MEM_COMMIT_RESERVE 
+    && (size % (uintptr_t)1 << 30) == 0 /* 1GiB multiple */
     && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 
     && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0)
     && pNtAllocateVirtualMemoryEx != NULL)
@@ -250,7 +250,9 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
 static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only) {
   static volatile uintptr_t large_page_try_ok = 0;
   void* p = NULL;
-  if (large_only || use_large_os_page(size, try_alignment)) {
+  if ((flags&MEM_COMMIT_RESERVE) == MEM_COMMIT_RESERVE 
+     && (large_only || use_large_os_page(size, try_alignment))) 
+  {
     uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
     if (!large_only && try_ok > 0) {
       // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
diff --git a/src/segment.c b/src/segment.c
index 3b3272a1..54de294f 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -39,7 +39,7 @@ static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) {
   return &segment->slices[segment->slice_entries];
 }
 
-/*
+
 static uint8_t* mi_slice_start(const mi_slice_t* slice) {
   mi_segment_t* segment = _mi_ptr_segment(slice);
   mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment));
@@ -47,11 +47,6 @@ static uint8_t* mi_slice_start(const mi_slice_t* slice) {
 }
 
 
-static size_t mi_slices_in(size_t size) {
-  return (size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE;
-}
-*/
-
 /* -----------------------------------------------------------
    Bins
 ----------------------------------------------------------- */
@@ -359,6 +354,68 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
    Span management
 ----------------------------------------------------------- */
 
+static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size) {
+  mi_assert_internal(_mi_ptr_segment(p) == segment);
+  if (size == 0 || size > MI_SEGMENT_SIZE) return 0;
+  if (p >= (uint8_t*)segment + mi_segment_size(segment)) return 0;
+
+  uintptr_t diff = (p - (uint8_t*)segment);
+  uintptr_t start;
+  uintptr_t end;
+  if (conservative) {
+    start = _mi_align_up(diff, MI_COMMIT_SIZE);
+    end   = _mi_align_down(diff + size, MI_COMMIT_SIZE);
+  }
+  else {
+    start = _mi_align_down(diff, MI_COMMIT_SIZE);
+    end   = _mi_align_up(diff + size, MI_COMMIT_SIZE);
+  }
+  mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0);
+  *start_p   = (uint8_t*)segment + start;
+  *full_size = (end > start ? end - start : 0);
+
+  uintptr_t bitidx = start / MI_COMMIT_SIZE;
+  mi_assert_internal(bitidx < (MI_INTPTR_SIZE*8));
+  
+  uintptr_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
+  if (bitidx + bitcount > MI_INTPTR_SIZE*8) {
+    _mi_warning_message("%zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size);
+  }
+  mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8));
+
+  uintptr_t mask = (((uintptr_t)1 << bitcount) - 1) << bitidx;
+
+  return mask;
+}
+
+static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
+  // commit liberal, but decommit conservative
+  uint8_t* start;
+  size_t   full_size;
+  uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size);
+  if (mask==0 || full_size==0) return;
+
+  if (commit && (segment->commit_mask & mask) != mask) {
+    _mi_os_commit(start,full_size,stats);
+    segment->commit_mask |= mask; 
+  }
+  else if (!commit && (segment->commit_mask & mask) != 0) {
+    _mi_os_decommit(start, full_size,stats);
+    segment->commit_mask &= ~mask;
+  }
+}
+
+static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
+  if (~segment->commit_mask == 0) return; // fully committed
+  mi_segment_commitx(segment,true,p,size,stats);
+}
+
+static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
+  if (!segment->allow_decommit || !mi_option_is_enabled(mi_option_decommit)) return;
+  if (segment->commit_mask == 1) return; // fully decommitted
+  mi_segment_commitx(segment, false, p, size, stats);
+}
+
 static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_entries);
   mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_span_queue_for(slice_count,tld));
@@ -376,6 +433,10 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
     last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1));
     last->block_size = 0;
   }
+
+  // perhaps decommit
+  mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats);
+
   // and push it on the free page queue (if it was not a huge page)
   if (sq != NULL) mi_span_queue_push( sq, slice );
              else slice->block_size = 0; // mark huge page as free anyways
@@ -452,7 +513,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz
 }
 
 
-static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count) {
+static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_entries);
   mi_slice_t* slice = &segment->slices[slice_index];
   mi_assert_internal(slice->block_size==0 || slice->block_size==1);
@@ -481,6 +542,8 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
     last->block_size = 1;
   }
 
+  // ensure the memory is committed
+  mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
   segment->used++;
   return page;
 }
@@ -500,7 +563,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
           mi_segment_slice_split(segment, slice, slice_count, tld);
         }
         mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->block_size > 0);
-        return mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count);
+        return mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
       }
     }
     sq++;
@@ -524,49 +587,58 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
   size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
 
-  // Try to get it from our thread local cache first
-  bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit)
-                || required > 0; // huge page
+  // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
+  size_t lazy = (size_t)mi_option_get(mi_option_lazy_commit);
+  bool commit_lazy = (lazy > tld->count) && required == 0; // lazy, and not a huge page
+
+  // Try to get from our cache first
   mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld);
   if (segment==NULL) {
     // Allocate the segment from the OS
-    segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, /* &memid,*/ os_tld);
+    segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, !commit_lazy, /* &memid,*/ os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
-    if (!commit) {
-      _mi_os_commit(segment, info_slices*MI_SEGMENT_SLICE_SIZE, tld->stats);
+    mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
+    if (commit_lazy) {
+      // at least commit the info slices
+      mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE);
+      _mi_os_commit(segment, MI_COMMIT_SIZE, tld->stats);
     }
     mi_segments_track_size((long)(segment_size), tld);
     mi_segment_map_allocated_at(segment);
   }
-  mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
   // zero the segment info? -- not needed as it is zero initialized from the OS 
   // memset(segment, 0, info_size);  
 
-  if (mi_option_is_enabled(mi_option_secure)) {
-    // in secure mode, we set up a protected page in between the segment info
-    // and the page data
-    size_t os_page_size = _mi_os_page_size();
-    size_t info_size = (info_slices * MI_SEGMENT_SLICE_SIZE);
-    mi_assert_internal(info_size - os_page_size >= pre_size);
-    _mi_os_protect((uint8_t*)segment + info_size - os_page_size, os_page_size);
-    // and protect the last page too
-    _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size);
-    if (slice_entries == segment_slices) slice_entries--; // don't use the last slice :-(
-  }
-
+  
   // initialize segment info
+  memset(segment,0,offsetof(mi_segment_t,slices));  
   segment->segment_slices = segment_slices;
   segment->segment_info_slices = info_slices;
   segment->thread_id = _mi_thread_id();
   segment->cookie = _mi_ptr_cookie(segment);
   segment->slice_entries = slice_entries;
-  
   segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
+  segment->allow_decommit = commit_lazy;
+  segment->commit_mask = (commit_lazy ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed
+  memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1));
   _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));
 
+  // set up guard pages
+  if (mi_option_is_enabled(mi_option_secure)) {
+    // in secure mode, we set up a protected page in between the segment info
+    // and the page data
+    size_t os_page_size = _mi_os_page_size();    
+    mi_assert_internal(mi_segment_info_size(segment) - os_page_size >= pre_size);
+    _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_page_size, os_page_size);
+    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_page_size;
+    mi_segment_ensure_committed(segment, end, os_page_size, tld->stats);
+    _mi_os_protect(end, os_page_size);
+    if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-(
+  }
+
   // reserve first slices for segment info
-  mi_segment_span_allocate(segment,0,info_slices);
+  mi_segment_span_allocate(segment, 0, info_slices, tld);
   mi_assert_internal(segment->used == 1);
   segment->used = 0; // don't count our internal slices towards usage
   
@@ -577,7 +649,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   }
   else {
     mi_assert_internal(huge_page!=NULL);
-    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices);
+    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices, tld);
   }
 
   return segment;
@@ -886,7 +958,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
 #if (MI_INTPTR_SIZE==8)
 #define MI_MAX_ADDRESS    ((size_t)20 << 40)  // 20TB
 #else
-#define MI_MAX_ADDRESS    ((size_t)1 << 31)   // 2Gb
+#define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
 #endif
 
 #define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)

From 80a36f1d7cdfdd2371213720d8312b4323b2f83a Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 24 Aug 2019 17:02:32 -0700
Subject: [PATCH 010/352] reduce page retire words to 32

---
 src/page.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/page.c b/src/page.c
index 313bb66a..68312f81 100644
--- a/src/page.c
+++ b/src/page.c
@@ -387,7 +387,7 @@ void _mi_page_retire(mi_page_t* page) {
   mi_assert_internal(page != NULL);
   mi_assert_expensive(_mi_page_is_valid(page));
   mi_assert_internal(mi_page_all_free(page));
-
+  
   mi_page_set_has_aligned(page, false);
 
   // don't retire too often..
@@ -396,13 +396,13 @@ void _mi_page_retire(mi_page_t* page) {
   // is the only page left with free blocks. It is not clear
   // how to check this efficiently though... for now we just check
   // if its neighbours are almost fully used.
-  if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
+  if (mi_likely(page->block_size <= 32*MI_INTPTR_SIZE)) {
     if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
       _mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1);
       return; // dont't retire after all
     }
   }
-
+  
   _mi_page_free(page, mi_page_queue_of(page), false);
 }
 

From c7ec30ae25178e7386fa3e202557a9f14a0ffbc0 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 30 Oct 2019 15:36:13 -0700
Subject: [PATCH 011/352] fix secure mode

---
 include/mimalloc-types.h | 2 +-
 src/segment.c            | 4 +++-
 test/test-stress.c       | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 91a68247..b77d77d3 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // experimental, may be more expensive: checks for double free.
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 4
+#define MI_SECURE 0
 #endif
 
 // Define MI_DEBUG for debug mode
diff --git a/src/segment.c b/src/segment.c
index b8db8460..7dcfcd36 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -226,12 +226,13 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
     mi_assert_internal((uintptr_t)p % _mi_os_page_size() == 0);
   }
   */
-
+  /* TODO: guard pages between every slice span
   if (MI_SECURE > 1 || (MI_SECURE == 1 && slice == &segment->slices[segment->slice_entries - 1])) {
     // secure == 1: the last page has an os guard page at the end
     // secure >  1: every page has an os guard page
     psize -= _mi_os_page_size();
   }
+  */
 
   if (page_size != NULL) *page_size = psize;
   mi_assert_internal(_mi_ptr_page(p) == page);
@@ -708,6 +709,7 @@ static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t requir
   // find a free page
   size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
   size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
+  mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
   mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
   if (page==NULL) {
     // no free page, allocate a new segment and try again
diff --git a/test/test-stress.c b/test/test-stress.c
index e3b0f7a3..08406ec7 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -17,7 +17,7 @@ terms of the MIT license.
 #include <mimalloc.h>
 
 // argument defaults
-static int THREADS = 1;    // more repeatable if THREADS <= #processors
+static int THREADS = 32;    // more repeatable if THREADS <= #processors
 static int N       = 20;    // scaling factor
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors

From f7d2c45af3700aa045d99e76b98139099aa4691e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 00:40:41 -0700
Subject: [PATCH 012/352] initial experiment with fixed memory arena and sliced
 segments

---
 CMakeLists.txt                               |   1 +
 ide/vs2017/mimalloc-override.vcxproj         |   1 +
 ide/vs2017/mimalloc-override.vcxproj.filters |   3 +
 ide/vs2017/mimalloc.vcxproj                  |   1 +
 ide/vs2017/mimalloc.vcxproj.filters          |   3 +
 ide/vs2019/mimalloc-override.vcxproj         |   1 +
 ide/vs2019/mimalloc.vcxproj                  |   1 +
 include/mimalloc-internal.h                  |   6 +
 include/mimalloc-types.h                     |   8 +-
 src/memory.c                                 | 551 -------------------
 src/os.c                                     |   4 +-
 src/segment.c                                |  21 +-
 test/test-stress.c                           |   2 +-
 13 files changed, 39 insertions(+), 564 deletions(-)
 delete mode 100644 src/memory.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 84668eb3..f8836f20 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}")
 set(mi_sources
     src/stats.c
     src/os.c
+    src/arena.c
     src/segment.c
     src/page.c
     src/alloc.c
diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index 35f84a4b..458d5e70 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -231,6 +231,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\options.c" />
diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters
index f7ea6d4c..64bb3dbd 100644
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@@ -67,5 +67,8 @@
     <ClCompile Include="..\..\src\alloc-posix.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj
index 415f87fc..219449c9 100644
--- a/ide/vs2017/mimalloc.vcxproj
+++ b/ide/vs2017/mimalloc.vcxproj
@@ -217,6 +217,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\options.c" />
diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters
index 0e5512bc..87f7e9e1 100644
--- a/ide/vs2017/mimalloc.vcxproj.filters
+++ b/ide/vs2017/mimalloc.vcxproj.filters
@@ -50,6 +50,9 @@
     <ClCompile Include="..\..\src\alloc-posix.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index 3a9cdcae..ac559468 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -231,6 +231,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\options.c" />
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index 2af40f16..f38a7a11 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -217,6 +217,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\options.c" />
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index f4822b10..f5b11c33 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -65,6 +65,12 @@ bool      _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
 bool      _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
 bool      _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 void*     _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
+int       _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
+
+// arena.c
+void*     _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*     _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void      _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
 
 /*
 // memory.c
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index b77d77d3..b043bebe 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -239,12 +239,14 @@ typedef mi_page_t mi_slice_t;
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
 typedef struct mi_segment_s {
-  struct mi_segment_s*          next;   // the list of freed segments in the cache
-  volatile _Atomic(struct mi_segment_s*) abandoned_next;
-
+  size_t            memid;              // memory id for arena allocation
   bool              mem_is_fixed;       // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
   bool              mem_is_committed;   // `true` if the whole segment is eagerly committed
 
+  // from here is zero initialized
+  struct mi_segment_s*          next;   // the list of freed segments in the cache
+  volatile _Atomic(struct mi_segment_s*) abandoned_next;
+
   size_t            abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
   size_t            used;               // count of pages in use
   uintptr_t         cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`  
diff --git a/src/memory.c b/src/memory.c
deleted file mode 100644
index 80351edc..00000000
--- a/src/memory.c
+++ /dev/null
@@ -1,551 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
-and the segment and huge object allocation by mimalloc. There may be multiple
-implementations of this (one could be the identity going directly to the OS,
-another could be a simple cache etc), but the current one uses large "regions".
-In contrast to the rest of mimalloc, the "regions" are shared between threads and
-need to be accessed using atomic operations.
-We need this memory layer between the raw OS calls because of:
-1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
-   to reuse memory effectively.
-2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
-   an OS allocation/free is still (much) too expensive relative to the accesses in that
-   object :-( (`malloc-large` tests this). This means we need a cheaper way to
-   reuse memory.
-3. This layer can help with a NUMA aware allocation in the future.
-
-Possible issues:
-- (2) can potentially be addressed too with a small cache per thread which is much
-  simpler. Generally though that requires shrinking of huge pages, and may overuse
-  memory per thread. (and is not compatible with `sbrk`).
-- Since the current regions are per-process, we need atomic operations to
-  claim blocks which may be contended
-- In the worst case, we need to search the whole region map (16KiB for 256GiB)
-  linearly. At what point will direct OS calls be faster? Is there a way to
-  do this better without adding too much complexity?
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <string.h>  // memset
-
-// Internal raw OS interface
-size_t  _mi_os_large_page_size();
-bool    _mi_os_protect(void* addr, size_t size);
-bool    _mi_os_unprotect(void* addr, size_t size);
-bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
-bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
-bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-void*   _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
-void    _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
-void*   _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
-bool    _mi_os_is_huge_reserved(void* p);
-
-// Constants
-#if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * (1ULL << 30))  // 256GiB => 16KiB for the region map
-#elif (MI_INTPTR_SIZE==4)
-#define MI_HEAP_REGION_MAX_SIZE    (3 * (1UL << 30))    // 3GiB => 196 bytes for the region map
-#else
-#error "define the maximum heap space allowed for regions on this platform"
-#endif
-
-#define MI_SEGMENT_ALIGN          MI_SEGMENT_SIZE
-
-#define MI_REGION_MAP_BITS        (MI_INTPTR_SIZE * 8)
-#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS)
-#define MI_REGION_MAX_ALLOC_SIZE  ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE)  // 64MiB
-#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)
-#define MI_REGION_MAP_FULL        UINTPTR_MAX
-
-
-typedef uintptr_t mi_region_info_t;
-
-static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) {
-  return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0));
-}
-
-static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) {
-  if (is_large) *is_large = ((info&0x02) != 0);
-  if (is_committed) *is_committed = ((info&0x01) != 0);
-  return (void*)(info & ~0x03);
-}
-
-
-// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
-// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
-typedef struct mem_region_s {
-  volatile _Atomic(uintptr_t)        map;   // in-use bit per MI_SEGMENT_SIZE block
-  volatile _Atomic(mi_region_info_t) info;  // start of virtual memory area, and flags
-  volatile _Atomic(uintptr_t)        dirty_mask; // bit per block if the contents are not zero'd
-} mem_region_t;
-
-
-// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
-// TODO: in the future, maintain a map per NUMA node for numa aware allocation
-static mem_region_t regions[MI_REGION_MAX];
-
-static volatile _Atomic(uintptr_t) regions_count; // = 0;        // allocated regions
-
-
-/* ----------------------------------------------------------------------------
-Utility functions
------------------------------------------------------------------------------*/
-
-// Blocks (of 4MiB) needed for the given size.
-static size_t mi_region_block_count(size_t size) {
-  mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE);
-  return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE;
-}
-
-// The bit mask for a given number of blocks at a specified bit index.
-static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) {
-  mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS);
-  return ((((uintptr_t)1 << blocks) - 1) << bitidx);
-}
-
-// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
-static size_t mi_good_commit_size(size_t size) {
-  if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
-  return _mi_align_up(size, _mi_os_large_page_size());
-}
-
-// Return if a pointer points into a region reserved by us.
-bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  if (p==NULL) return false;
-  size_t count = mi_atomic_read_relaxed(&regions_count);
-  for (size_t i = 0; i < count; i++) {
-    uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(&regions[i].info), NULL, NULL);
-    if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
-  }
-  return false;
-}
-
-
-/* ----------------------------------------------------------------------------
-Commit from a region
------------------------------------------------------------------------------*/
-
-// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`.
-// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
-// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
-// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks,
-                                    size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
-{
-  size_t mask = mi_region_block_mask(blocks,bitidx);
-  mi_assert_internal(mask != 0);
-  mi_assert_internal((mask & mi_atomic_read_relaxed(&region->map)) == mask);
-  mi_assert_internal(&regions[idx] == region);
-
-  // ensure the region is reserved
-  mi_region_info_t info = mi_atomic_read(&region->info);
-  if (info == 0)
-  {
-    bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
-    bool region_large  = *allow_large;
-    void* start = NULL;
-    if (region_large) {
-      start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN);
-      if (start != NULL) { region_commit = true; }
-    }
-    if (start == NULL) {
-      start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
-    }
-    mi_assert_internal(!(region_large && !*allow_large));
-
-    if (start == NULL) {
-      // failure to allocate from the OS! unclaim the blocks and fail
-      size_t map;
-      do {
-        map = mi_atomic_read_relaxed(&region->map);
-      } while (!mi_atomic_cas_weak(&region->map, map & ~mask, map));
-      return false;
-    }
-
-    // set the newly allocated region
-    info = mi_region_info_create(start,region_large,region_commit);
-    if (mi_atomic_cas_strong(&region->info, info, 0)) {
-      // update the region count
-      mi_atomic_increment(&regions_count);
-    }
-    else {
-      // failed, another thread allocated just before us!
-      // we assign it to a later slot instead (up to 4 tries).
-      for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
-        if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
-          mi_atomic_increment(&regions_count);
-          start = NULL;
-          break;
-        }
-      }
-      if (start != NULL) {
-        // free it if we didn't succeed to save it to some other region
-        _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
-      }
-      // and continue with the memory at our index
-      info = mi_atomic_read(&region->info);
-    }
-  }
-  mi_assert_internal(info == mi_atomic_read(&region->info));
-  mi_assert_internal(info != 0);
-
-  // Commit the blocks to memory
-  bool region_is_committed = false;
-  bool region_is_large = false;
-  void* start = mi_region_info_read(info,&region_is_large,&region_is_committed);
-  mi_assert_internal(!(region_is_large && !*allow_large));
-  mi_assert_internal(start!=NULL);
-
-  // set dirty bits
-  uintptr_t m;
-  do {
-    m = mi_atomic_read(&region->dirty_mask);
-  } while (!mi_atomic_cas_weak(&region->dirty_mask, m | mask, m));
-  *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range?
-
-  void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
-  if (*commit && !region_is_committed) {
-    // ensure commit
-    bool commit_zero = false;
-    _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats);  // only commit needed size (unless using large OS pages)
-    if (commit_zero) *is_zero = true;
-  }
-  else if (!*commit && region_is_committed) {
-    // but even when no commit is requested, we might have committed anyway (in a huge OS page for example)
-    *commit = true;
-  }
-
-  // and return the allocation
-  mi_assert_internal(blocks_start != NULL);
-  *allow_large = region_is_large;
-  *p  = blocks_start;
-  *id = (idx*MI_REGION_MAP_BITS) + bitidx;
-  return true;
-}
-
-// Use bit scan forward to quickly find the first zero bit if it is available
-#if defined(_MSC_VER)
-#define MI_HAVE_BITSCAN
-#include <intrin.h>
-static inline size_t mi_bsf(uintptr_t x) {
-  if (x==0) return 8*MI_INTPTR_SIZE;
-  DWORD idx;
-  #if (MI_INTPTR_SIZE==8)
-  _BitScanForward64(&idx, x);
-  #else
-  _BitScanForward(&idx, x);
-  #endif
-  return idx;
-}
-static inline size_t mi_bsr(uintptr_t x) {
-  if (x==0) return 8*MI_INTPTR_SIZE;
-  DWORD idx;
-  #if (MI_INTPTR_SIZE==8)
-  _BitScanReverse64(&idx, x);
-  #else
-  _BitScanReverse(&idx, x);
-  #endif
-  return idx;
-}
-#elif defined(__GNUC__) || defined(__clang__)
-#define MI_HAVE_BITSCAN
-static inline size_t mi_bsf(uintptr_t x) {
-  return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x));
-}
-static inline size_t mi_bsr(uintptr_t x) {
-  return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x));
-}
-#endif
-
-// Allocate `blocks` in a `region` at `idx` of a given `size`.
-// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
-// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
-// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size,
-                                   bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
-{
-  mi_assert_internal(p != NULL && id != NULL);
-  mi_assert_internal(blocks < MI_REGION_MAP_BITS);
-
-  const uintptr_t mask = mi_region_block_mask(blocks, 0);
-  const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
-  uintptr_t map = mi_atomic_read(&region->map);
-  if (map==MI_REGION_MAP_FULL) return true;
-
-  #ifdef MI_HAVE_BITSCAN
-  size_t bitidx = mi_bsf(~map);    // quickly find the first zero bit if possible
-  #else
-  size_t bitidx = 0;               // otherwise start at 0
-  #endif
-  uintptr_t m = (mask << bitidx);     // invariant: m == mask shifted by bitidx
-
-  // scan linearly for a free range of zero bits
-  while(bitidx <= bitidx_max) {
-    if ((map & m) == 0) {  // are the mask bits free at bitidx?
-      mi_assert_internal((m >> bitidx) == mask); // no overflow?
-      uintptr_t newmap = map | m;
-      mi_assert_internal((newmap^map) >> bitidx == mask);
-      if (!mi_atomic_cas_weak(&region->map, newmap, map)) {  // TODO: use strong cas here?
-        // no success, another thread claimed concurrently.. keep going
-        map = mi_atomic_read(&region->map);
-        continue;
-      }
-      else {
-        // success, we claimed the bits
-        // now commit the block memory -- this can still fail
-        return mi_region_commit_blocks(region, idx, bitidx, blocks,
-                                       size, commit, allow_large, is_zero, p, id, tld);
-      }
-    }
-    else {
-      // on to the next bit range
-      #ifdef MI_HAVE_BITSCAN
-      size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
-      mi_assert_internal(shift > 0 && shift <= blocks);
-      #else
-      size_t shift = 1;
-      #endif
-      bitidx += shift;
-      m <<= shift;
-    }
-  }
-  // no error, but also no bits found
-  return true;
-}
-
-// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
-// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
-// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
-// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size,
-                                       bool* commit, bool* allow_large, bool* is_zero,
-                                       void** p, size_t* id, mi_os_tld_t* tld)
-{
-  // check if there are available blocks in the region..
-  mi_assert_internal(idx < MI_REGION_MAX);
-  mem_region_t* region = &regions[idx];
-  uintptr_t m = mi_atomic_read_relaxed(&region->map);
-  if (m != MI_REGION_MAP_FULL) {  // some bits are zero
-    bool ok = (*commit || *allow_large); // committing or allow-large is always ok
-    if (!ok) {
-      // otherwise skip incompatible regions if possible.
-      // this is not guaranteed due to multiple threads allocating at the same time but
-      // that's ok. In secure mode, large is never allowed for any thread, so that works out;
-      // otherwise we might just not be able to reset/decommit individual pages sometimes.
-      mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
-      bool is_large;
-      bool is_committed;
-      void* start = mi_region_info_read(info,&is_large,&is_committed);
-      ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation?
-    }
-    if (ok) {
-      return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld);
-    }
-  }
-  return true;  // no error, but no success either
-}
-
-/* ----------------------------------------------------------------------------
- Allocation
------------------------------------------------------------------------------*/
-
-// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
-// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero,
-                            size_t* id, mi_os_tld_t* tld)
-{
-  mi_assert_internal(id != NULL && tld != NULL);
-  mi_assert_internal(size > 0);
-  *id = SIZE_MAX;
-  *is_zero = false;
-  bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`
-
-  // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`)
-  if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
-    *is_zero = true;
-    return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld);  // round up size
-  }
-
-  // always round size to OS page size multiple (so commit/decommit go over the entire range)
-  // TODO: use large OS page size here?
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  // calculate the number of needed blocks
-  size_t blocks = mi_region_block_count(size);
-  mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
-
-  // find a range of free blocks
-  void* p = NULL;
-  size_t count = mi_atomic_read(&regions_count);
-  size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
-  for (size_t visited = 0; visited < count; visited++, idx++) {
-    if (idx >= count) idx = 0;  // wrap around
-    if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
-    if (p != NULL) break;
-  }
-
-  if (p == NULL) {
-    // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions
-    for (idx = count; idx < mi_atomic_read_relaxed(&regions_count) + 8 && idx < MI_REGION_MAX; idx++) {
-      if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
-      if (p != NULL) break;
-    }
-  }
-
-  if (p == NULL) {
-    // we could not find a place to allocate, fall back to the os directly
-    _mi_warning_message("unable to allocate from region: size %zu\n", size);
-    *is_zero = true;
-    p = _mi_os_alloc_aligned(size, alignment, commit, large, tld);
-  }
-  else {
-    tld->region_idx = idx;  // next start of search? currently not used as we use first-fit
-  }
-
-  mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
-  return p;
-}
-
-
-
-/* ----------------------------------------------------------------------------
-Free
------------------------------------------------------------------------------*/
-
-// Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
-  mi_assert_internal(size > 0 && stats != NULL);
-  if (p==NULL) return;
-  if (size==0) return;
-  if (id == SIZE_MAX) {
-   // was a direct OS allocation, pass through
-    _mi_os_free(p, size, stats);
-  }
-  else {
-    // allocated in a region
-    mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return;
-    // we can align the size up to page size (as we allocate that way too)
-    // this ensures we fully commit/decommit/reset
-    size = _mi_align_up(size, _mi_os_page_size());
-    size_t idx = (id / MI_REGION_MAP_BITS);
-    size_t bitidx = (id % MI_REGION_MAP_BITS);
-    size_t blocks = mi_region_block_count(size);
-    size_t mask = mi_region_block_mask(blocks, bitidx);
-    mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
-    mem_region_t* region = &regions[idx];
-    mi_assert_internal((mi_atomic_read_relaxed(&region->map) & mask) == mask ); // claimed?
-    mi_region_info_t info = mi_atomic_read(&region->info);
-    bool is_large;
-    bool is_eager_committed;
-    void* start = mi_region_info_read(info,&is_large,&is_eager_committed);
-    mi_assert_internal(start != NULL);
-    void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
-    mi_assert_internal(blocks_start == p); // not a pointer in our area?
-    mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS);
-    if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`?
-
-    // decommit (or reset) the blocks to reduce the working set.
-    // TODO: implement delayed decommit/reset as these calls are too expensive
-    // if the memory is reused soon.
-    // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large
-    if (!is_large) {
-      if (mi_option_is_enabled(mi_option_segment_reset)) {
-        if (!is_eager_committed &&  // cannot reset large pages
-          (mi_option_is_enabled(mi_option_eager_commit) ||  // cannot reset halfway committed segments, use `option_page_reset` instead
-            mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments
-        {
-          _mi_os_reset(p, size, stats);
-          //_mi_os_decommit(p, size, stats);  // todo: and clear dirty bits?
-        }
-      }
-<<<<<<< HEAD
-      // else { _mi_os_reset(p,size,stats); }
-    }
-=======
-    }    
->>>>>>> dev
-    if (!is_eager_committed) {
-      // adjust commit statistics as we commit again when re-using the same slot
-      _mi_stat_decrease(&stats->committed, mi_good_commit_size(size));
-    }
-
-    // TODO: should we free empty regions? currently only done _mi_mem_collect.
-    // this frees up virtual address space which might be useful on 32-bit systems?
-
-    // and unclaim
-    uintptr_t map;
-    uintptr_t newmap;
-    do {
-      map = mi_atomic_read_relaxed(&region->map);
-      newmap = map & ~mask;
-    } while (!mi_atomic_cas_weak(&region->map, newmap, map));
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  collection
------------------------------------------------------------------------------*/
-void _mi_mem_collect(mi_stats_t* stats) {
-  // free every region that has no segments in use.
-  for (size_t i = 0; i < regions_count; i++) {
-    mem_region_t* region = &regions[i];
-    if (mi_atomic_read_relaxed(&region->map) == 0) {
-      // if no segments used, try to claim the whole region
-      uintptr_t m;
-      do {
-        m = mi_atomic_read_relaxed(&region->map);
-      } while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
-      if (m == 0) {
-        // on success, free the whole region (unless it was huge reserved)
-        bool is_eager_committed;
-        void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
-        if (start != NULL && !_mi_os_is_huge_reserved(start)) {
-          _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats);
-        }
-        // and release
-        mi_atomic_write(&region->info,0);
-        mi_atomic_write(&region->map,0);
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------------
-  Other
------------------------------------------------------------------------------*/
-
-bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) {
-  return _mi_os_commit(p, size, is_zero, stats);
-}
-
-bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) {
-  return _mi_os_decommit(p, size, stats);
-}
-
-bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) {
-  return _mi_os_reset(p, size, stats);
-}
-
-bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) {
-  return _mi_os_unreset(p, size, is_zero, stats);
-}
-
-bool _mi_mem_protect(void* p, size_t size) {
-  return _mi_os_protect(p, size);
-}
-
-bool _mi_mem_unprotect(void* p, size_t size) {
-  return _mi_os_unprotect(p, size);
-}
diff --git a/src/os.c b/src/os.c
index 5e595f93..191be56c 100644
--- a/src/os.c
+++ b/src/os.c
@@ -868,13 +868,13 @@ static void mi_os_free_huge_reserved() {
 */
 
 #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP)))
-int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
   UNUSED(pages); UNUSED(max_secs);
   if (pages_reserved != NULL) *pages_reserved = 0;
   return ENOMEM;
 }
 #else
-int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
+int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
 {
   if (pages_reserved != NULL) *pages_reserved = 0;
   if (max_secs==0) return ETIMEDOUT; // timeout
diff --git a/src/segment.c b/src/segment.c
index 7dcfcd36..8a02acac 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -284,7 +284,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   if (MI_SECURE>0) {
     _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
   }
-  _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
+  // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
+  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, tld->stats);
 }
 
 
@@ -598,29 +599,35 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   
   // Try to get from our cache first
   mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld);
+  bool is_zero = false;
   if (segment==NULL) {
     // Allocate the segment from the OS
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
-    segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld);
+    size_t memid = 0;
+    // segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld);
+    segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
     if (!commit) {
       // at least commit the info slices
       mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE);
-      bool is_zero = false;
       _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats);
     }
+    segment->memid = memid;
     segment->mem_is_fixed = mem_large;
     segment->mem_is_committed = commit;
     mi_segments_track_size((long)(segment_size), tld);
     mi_segment_map_allocated_at(segment);
   }
 
-  // zero the segment info? -- not needed as it is zero initialized from the OS 
-  // memset(segment, 0, info_size);  
+  // zero the segment info? -- not always needed as it is zero initialized from the OS 
+  if (!is_zero) {
+    ptrdiff_t ofs = offsetof(mi_segment_t, next);
+    size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
+    memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices);
+  }
   
   // initialize segment info
-  memset(segment,0,offsetof(mi_segment_t,slices));  
   segment->segment_slices = segment_slices;
   segment->segment_info_slices = info_slices;
   segment->thread_id = _mi_thread_id();
@@ -629,7 +636,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
   segment->allow_decommit = !commit;
   segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed
-  memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1));
+  // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1));
   _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));
 
   // set up guard pages
diff --git a/test/test-stress.c b/test/test-stress.c
index 08406ec7..f60cda10 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -18,7 +18,7 @@ terms of the MIT license.
 
 // argument defaults
 static int THREADS = 32;    // more repeatable if THREADS <= #processors
-static int N       = 20;    // scaling factor
+static int N       = 40;    // scaling factor
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int N       = 100;  // scaling factor

From 28cb19148c112cbfbcf3a768a18b3b4cb5a3301c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 09:10:58 -0700
Subject: [PATCH 013/352] fixed memory arena allocation for huge pages

---
 ide/vs2019/mimalloc.vcxproj |   2 +-
 src/arena.c                 | 366 ++++++++++++++++++++++++++++++++++++
 2 files changed, 367 insertions(+), 1 deletion(-)
 create mode 100644 src/arena.c

diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index f38a7a11..6cfd76fa 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -116,7 +116,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=1;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=2;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp17</LanguageStandard>
diff --git a/src/arena.c b/src/arena.c
new file mode 100644
index 00000000..b25732d7
--- /dev/null
+++ b/src/arena.c
@@ -0,0 +1,366 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+#include "mimalloc-atomic.h"
+
+#include <string.h>  // memset
+
+/* -----------------------------------------------------------
+  Arena allocation
+----------------------------------------------------------- */
+
+#define MI_SEGMENT_ALIGN     MI_SEGMENT_SIZE
+#define MI_ARENA_BLOCK_SIZE  MI_SEGMENT_SIZE 
+#define MI_MAX_ARENAS        (64)
+
+// Block info: bit 0 contains the `in_use` bit, the upper bits the
+// size in count of arena blocks.
+typedef uintptr_t mi_block_info_t;
+
+// A memory arena descriptor
+typedef struct mi_arena_s {
+  uint8_t* start;                         // the start of the memory area
+  size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
+  bool     is_zero_init;                  // is the arena zero initialized?
+  bool     is_large;                      // large OS page allocated
+  _Atomic(uintptr_t)       block_bottom;  // optimization to start the search for free blocks
+  _Atomic(mi_block_info_t) blocks[1];     // `block_count` block info's
+} mi_arena_t;
+
+
+// The available arenas
+static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
+static _Atomic(uintptr_t)   mi_arena_count; // = 0
+
+
+/* -----------------------------------------------------------
+  Arena allocations get a memory id where the lower 8 bits are
+  the arena index +1, and the upper bits the block index.
+----------------------------------------------------------- */
+
+// Use `SIZE_MAX` as a special id for direct OS allocated memory.
+#define MI_MEMID_OS   (SIZE_MAX)
+
+static size_t mi_memid_create(size_t arena_index, size_t block_index) {
+  mi_assert_internal(arena_index < 0xFE);
+  return ((block_index << 8) | ((arena_index+1) & 0xFF));
+}
+
+static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) {
+  mi_assert_internal(memid != MI_MEMID_OS);
+  mi_assert_internal(memid != 0);
+  *arena_index = (memid & 0xFF) - 1;
+  *block_index = (memid >> 8);
+}
+
+/* -----------------------------------------------------------
+  Block info
+----------------------------------------------------------- */
+
+static bool mi_block_is_in_use(mi_block_info_t info) {
+  return ((info&1) != 0);
+}
+
+static size_t mi_block_count(mi_block_info_t info) {
+  return (info>>1);
+}
+
+static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) {
+  return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0));
+}
+
+
+/* -----------------------------------------------------------
+  Thread safe allocation in an arena
+----------------------------------------------------------- */
+
+static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index)
+{
+  // Scan linearly through all block info's
+  // Skipping used ranges, coalescing free ranges on demand.
+  mi_assert_internal(needed_bcount > 0);
+  mi_assert_internal(start_idx <= arena->block_count);
+  mi_assert_internal(end_idx <= arena->block_count);
+  _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx];
+  _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx];
+  while (block < end) {
+    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
+    size_t bcount = mi_block_count(binfo);
+    if (mi_block_is_in_use(binfo)) {
+      // in-use, skip ahead
+      mi_assert_internal(bcount > 0);
+      block += bcount;
+    }
+    else {
+      // free blocks
+      if (bcount==0) {
+        // optimization:
+        // use 0 initialized blocks at the end, to use single atomic operation
+        // initially to reduce contention (as we don't need to split)
+        if (block + needed_bcount > end) {
+          return NULL; // does not fit
+        }
+        else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) {
+          // ouch, someone else was quicker. Try again..
+          continue;
+        }
+        else {
+          // we got it: return a pointer to the claimed memory
+          ptrdiff_t idx = (block - arena->blocks);
+          *is_zero = arena->is_zero_init;
+          *block_index = idx;
+          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
+        }
+      }
+
+      mi_assert_internal(bcount>0);
+      if (needed_bcount > bcount) {
+#if 0 // MI_NO_ARENA_COALESCE
+        block += bcount; // too small, skip to the next range
+        continue;
+#else
+        // too small, try to coalesce
+        _Atomic(mi_block_info_t)* block_next = block + bcount;
+        if (block_next >= end) {
+          return NULL; // does not fit
+        }
+        mi_block_info_t binfo_next = mi_atomic_read(block_next);
+        size_t bcount_next = mi_block_count(binfo_next);
+        if (mi_block_is_in_use(binfo_next)) {
+          // next block is in use, cannot coalesce
+          block += (bcount + bcount_next); // skip ahea over both blocks
+        }
+        else {
+          // next block is free, try to coalesce
+          // first set the next one to being used to prevent dangling ranges
+          if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) {
+            // someone else got in before us.. try again
+            continue;
+          }
+          else {
+            if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) {  // use strong to increase success chance
+              // someone claimed/coalesced the block in the meantime
+              // first free the next block again..
+              bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong
+              mi_assert(ok); UNUSED(ok);
+              // and try again
+              continue;
+            }
+            else {
+              // coalesced! try again
+              // todo: we could optimize here to immediately claim the block if the
+              // coalesced size is a fit instead of retrying. Keep it simple for now.
+              continue;
+            }
+          }
+        }
+#endif    
+      }
+      else {  // needed_bcount <= bcount
+        mi_assert_internal(needed_bcount <= bcount);
+        // it fits, claim the whole block
+        if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) {
+          // ouch, someone else was quicker. Try again..
+          continue;
+        }
+        else {
+          // got it, now split off the needed part          
+          if (needed_bcount < bcount) {
+            mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false));
+            mi_atomic_write(block, mi_block_info_create(needed_bcount, true));
+          }
+          // return a pointer to the claimed memory
+          ptrdiff_t idx = (block - arena->blocks);
+          *is_zero = false;
+          *block_index = idx;
+          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
+        }
+      }
+    }
+  }
+  // no success
+  return NULL;
+}
+
+// Try to reduce search time by starting from bottom and wrap around.
+static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index) 
+{
+  uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom);
+  void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index);
+  if (p == NULL && bottom > 0) {
+    // try again from the start
+    p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index);
+  }
+  if (p != NULL) {
+    mi_atomic_write(&arena->block_bottom, *block_index);
+  }
+  return p;
+}
+
+/* -----------------------------------------------------------
+  Arena Allocation
+----------------------------------------------------------- */
+
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
+  mi_assert_internal(memid != NULL && tld != NULL);
+  mi_assert_internal(size > 0);
+  *memid = MI_MEMID_OS;  
+  *is_zero = false;
+  bool default_large = false;
+  if (large==NULL) large = &default_large;  // ensure `large != NULL`
+
+  // try to allocate in an arena if the alignment is small enough
+  // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`.
+  if (alignment <= MI_SEGMENT_ALIGN &&
+      size >= 3*(MI_ARENA_BLOCK_SIZE/4) &&  // > 48MiB (not more than 25% waste)
+      !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <64MiB - 96MiB>
+     ) 
+  {
+    size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
+    size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
+
+    mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
+      mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
+      if (arena==NULL) break;
+      size_t block_index = SIZE_MAX;
+      void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index);
+      if (p != NULL) {
+        mi_assert_internal(block_index != SIZE_MAX);
+        #if MI_DEBUG>=1
+        _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
+        mi_block_info_t binfo = mi_atomic_read(block);
+        mi_assert_internal(mi_block_is_in_use(binfo));
+        mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
+        #endif 
+        *memid = mi_memid_create(i, block_index);
+        *commit = true;           // TODO: support commit on demand?
+        *large = arena->is_large;
+        mi_assert_internal((uintptr_t)p % alignment == 0);
+        return p;
+      }
+    }
+  }
+
+  // fall back to the OS
+  *is_zero = true;
+  *memid = MI_MEMID_OS;
+  return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
+}
+
+void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) 
+{
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld);
+}
+
+/* -----------------------------------------------------------
+  Arena free
+----------------------------------------------------------- */
+
+void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
+  mi_assert_internal(size > 0 && stats != NULL);
+  if (p==NULL) return;
+  if (size==0) return;
+  if (memid == MI_MEMID_OS) {
+    // was a direct OS allocation, pass through
+    _mi_os_free(p, size, stats);
+  }
+  else {
+    mi_assert_internal(memid != 0);
+    // allocated in an arena
+    size_t arena_idx;
+    size_t block_idx;
+    mi_memid_indices(memid, &arena_idx, &block_idx);
+    mi_assert_internal(arena_idx < MI_MAX_ARENAS);
+    mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
+    mi_assert_internal(arena != NULL);
+    if (arena == NULL) {
+      _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+      return;
+    }
+    mi_assert_internal(arena->block_count > block_idx);
+    if (arena->block_count <= block_idx) {
+      _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+      return;
+    }
+    _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx];
+    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
+    mi_assert_internal(mi_block_is_in_use(binfo));
+    mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
+    if (!mi_block_is_in_use(binfo)) {
+      _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
+      return;
+    };
+    bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo);
+    mi_assert_internal(ok);
+    if (!ok) {
+      _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo);
+    }
+    if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) {
+      mi_atomic_write(&arena->block_bottom, block_idx);
+    }
+  }
+}
+
+/* -----------------------------------------------------------
+  Add an arena.
+----------------------------------------------------------- */
+
+static bool mi_arena_add(mi_arena_t* arena) {
+  mi_assert_internal(arena != NULL);
+  mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
+  mi_assert_internal(arena->block_count > 0);
+  mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t)));
+  
+  uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
+  if (i >= MI_MAX_ARENAS) {
+    mi_atomic_subu(&mi_arena_count, 1);
+    return false;
+  }
+  mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena);
+  return true;
+}
+
+
+/* -----------------------------------------------------------
+  Reserve a huge page arena.
+  TODO: improve OS api to just reserve and claim a huge
+  page area at once, (and return the total size).
+----------------------------------------------------------- */
+
+#include <errno.h>
+
+void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
+
+int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+  size_t pages_reserved_default = 0;
+  if (pages_reserved==NULL) pages_reserved = &pages_reserved_default;
+  int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved);
+  if (*pages_reserved==0) return err;
+  size_t hsize = (*pages_reserved) * GiB;
+  void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN);
+  mi_assert_internal(p != NULL);
+  if (p == NULL) return ENOMEM;
+  size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
+  size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
+  mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats);
+  if (arena == NULL) return ENOMEM;
+  arena->block_count = bcount;
+  arena->start = (uint8_t*)p;
+  arena->block_bottom = 0;
+  arena->is_large = true;
+  arena->is_zero_init = true;
+  memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
+  //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false));
+  mi_arena_add(arena);
+  return 0;
+}
\ No newline at end of file

From ed4f60fc7e5bfb17e0e7b4cc6bdd6e7102637d16 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 10:59:40 -0700
Subject: [PATCH 014/352] respect large pages for arena allocation

---
 src/arena.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b25732d7..63f08737 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -232,21 +232,23 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
     for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
       mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
       if (arena==NULL) break;
-      size_t block_index = SIZE_MAX;
-      void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index);
-      if (p != NULL) {
-        mi_assert_internal(block_index != SIZE_MAX);
-        #if MI_DEBUG>=1
-        _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
-        mi_block_info_t binfo = mi_atomic_read(block);
-        mi_assert_internal(mi_block_is_in_use(binfo));
-        mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
-        #endif 
-        *memid = mi_memid_create(i, block_index);
-        *commit = true;           // TODO: support commit on demand?
-        *large = arena->is_large;
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        return p;
+      if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages
+        size_t block_index = SIZE_MAX;
+        void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index);
+        if (p != NULL) {
+          mi_assert_internal(block_index != SIZE_MAX);
+          #if MI_DEBUG>=1
+            _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
+            mi_block_info_t binfo = mi_atomic_read(block);
+            mi_assert_internal(mi_block_is_in_use(binfo));
+            mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
+          #endif 
+          * memid = mi_memid_create(i, block_index);
+          *commit = true;           // TODO: support commit on demand?
+          *large  = arena->is_large;
+          mi_assert_internal((uintptr_t)p % alignment == 0);
+          return p;
+        }
       }
     }
   }

From 6695f8ae91c30615f009114b818ccfbccee8b122 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 10:59:50 -0700
Subject: [PATCH 015/352] add allow_decommit option

---
 include/mimalloc.h | 2 +-
 src/options.c      | 6 +-----
 src/segment.c      | 2 +-
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index b63ed79d..a9c339e9 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -261,7 +261,6 @@ typedef enum mi_option_e {
   mi_option_verbose,
   // the following options are experimental
   mi_option_eager_commit,
-  mi_option_eager_region_commit,
   mi_option_large_os_pages,         // implies eager commit
   mi_option_reserve_huge_os_pages,
   mi_option_segment_cache,
@@ -269,6 +268,7 @@ typedef enum mi_option_e {
   mi_option_cache_reset,
   mi_option_reset_decommits,
   mi_option_eager_commit_delay,
+  mi_option_allow_decommit,
   mi_option_segment_reset,
   mi_option_os_tag,
   _mi_option_last
diff --git a/src/options.c b/src/options.c
index 99e90c46..31af819b 100644
--- a/src/options.c
+++ b/src/options.c
@@ -53,11 +53,6 @@ static mi_option_desc_t options[_mi_option_last] =
 
   // the following options are experimental and not all combinations make sense.
   { 1, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
-  #ifdef _WIN32   // and BSD?
-  { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
-  #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) }, 
-  #endif
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
@@ -65,6 +60,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(cache_reset) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // note: cannot enable this if secure is on
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
+  { 0, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 100, UNINIT, MI_OPTION(os_tag) }             // only apple specific for now but might serve more or less related purpose
 };
diff --git a/src/segment.c b/src/segment.c
index 8a02acac..41cf08d9 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -634,7 +634,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   segment->cookie = _mi_ptr_cookie(segment);
   segment->slice_entries = slice_entries;
   segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
-  segment->allow_decommit = !commit;
+  segment->allow_decommit = !commit && mi_option_is_enabled(mi_option_allow_decommit);
   segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed
   // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1));
   _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));

From bbca1cd8d96bde91cd14651cc114dff907d7ed73 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 12:42:23 -0700
Subject: [PATCH 016/352] allow decommit by default

---
 src/options.c | 2 +-
 src/segment.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/options.c b/src/options.c
index 31af819b..d5771705 100644
--- a/src/options.c
+++ b/src/options.c
@@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(cache_reset) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // note: cannot enable this if secure is on
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 0, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
+  { 1, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 100, UNINIT, MI_OPTION(os_tag) }             // only apple specific for now but might serve more or less related purpose
 };
diff --git a/src/segment.c b/src/segment.c
index 41cf08d9..e88d22c5 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -383,12 +383,11 @@ static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative
   
   uintptr_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
   if (bitidx + bitcount > MI_INTPTR_SIZE*8) {
-    _mi_warning_message("%zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size);
+    _mi_warning_message("commit mask overflow: %zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size);
   }
   mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8));
 
   uintptr_t mask = (((uintptr_t)1 << bitcount) - 1) << bitidx;
-
   return mask;
 }
 

From a74e072a9acb23e76fd0e1a6996bef1ade5ac027 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 19:00:26 -0700
Subject: [PATCH 017/352] set test-stress scale to 20 again

---
 test/test-stress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index f60cda10..08406ec7 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -18,7 +18,7 @@ terms of the MIT license.
 
 // argument defaults
 static int THREADS = 32;    // more repeatable if THREADS <= #processors
-static int N       = 40;    // scaling factor
+static int N       = 20;    // scaling factor
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int N       = 100;  // scaling factor

From 268698b9efaaba6d03ad3d0840494ed63ad74d7f Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sun, 10 Nov 2019 08:00:51 -0800
Subject: [PATCH 018/352] fix vs2019 project

---
 test/test-stress.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index 07f4a2aa..50cbf9bd 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -17,8 +17,8 @@ terms of the MIT license.
 #include <mimalloc.h>
 
 // argument defaults
-static int THREADS = 32;    // more repeatable if THREADS <= #processors
-static int N       = 40;    // scaling factor
+static int THREADS = 8;    // more repeatable if THREADS <= #processors
+static int N       = 200;    // scaling factor
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int N       = 100;  // scaling factor

From b04206a9d32b18fa1654104548d205c9f2dfb87b Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sun, 10 Nov 2019 10:10:10 -0800
Subject: [PATCH 019/352] add os cache to arena

---
 include/mimalloc-internal.h |   2 +-
 include/mimalloc-types.h    |   2 +-
 src/arena.c                 | 115 +++++++++++++++++++++++++++++++++---
 src/segment.c               |   2 +-
 test/test-stress.c          |   8 +--
 5 files changed, 115 insertions(+), 14 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 10528877..afa265f5 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -68,7 +68,7 @@ bool      _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats)
 // arena.c
 void*     _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 void*     _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-void      _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
+void      _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats);
 
 
 // "segment.c"
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index cd9c5154..8203bc3b 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -108,7 +108,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128kb on 64-bit
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   // 64kb on 64-bit
 
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/4)      // 16mb on 64-bit
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32mb on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 
 // Minimal alignment necessary. On most platforms 16 bytes are needed
diff --git a/src/arena.c b/src/arena.c
index 93655033..200e1ed7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -52,9 +52,9 @@ int   _mi_os_numa_node_count(void);
 // size in count of arena blocks.
 typedef uintptr_t mi_block_info_t;
 #define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
-#define MI_ARENA_BLOCK_SIZE   (MI_SEGMENT_SIZE/2)      // 32MiB
-#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE)  // 2GiB
-#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 16MiB
+#define MI_ARENA_BLOCK_SIZE   MI_SEGMENT_ALIGN         // 64MiB
+#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE)  // 4GiB
+#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 32MiB
 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
 
 // A memory arena descriptor
@@ -118,6 +118,98 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 }
 
 
+/* -----------------------------------------------------------
+  Arena cache
+----------------------------------------------------------- */
+#define MI_CACHE_MAX (8)
+#define MI_MAX_NUMA  (64)
+
+#define MI_SLOT_IN_USE ((void*)1)
+
+typedef struct mi_cache_slot_s {
+  volatile _Atomic(void*) p;
+  volatile size_t memid;
+  volatile bool   is_committed;
+  volatile bool   is_large;
+} mi_cache_slot_t;
+
+static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX];
+
+static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
+  // only segment blocks
+  if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL;
+
+  // set numa range 
+  int numa_min = numa_node;
+  int numa_max = numa_min;
+  if (numa_node < 0) {
+    numa_min = 0;
+    numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA;
+  }
+  else {
+    if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
+    numa_min = numa_max = numa_node;
+  }
+
+  // find a free slot
+  mi_cache_slot_t* slot;
+  for (int n = numa_min; n <= numa_max; n++) {
+    for (int i = 0; i < MI_CACHE_MAX; i++) {
+      slot = &cache[n][i];
+      void* p = mi_atomic_read_ptr_relaxed(&slot->p);
+      if (p > MI_SLOT_IN_USE) { // not NULL or 1
+        if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) {
+          // claimed
+          if (!*large && slot->is_large) {
+            // back out again
+            mi_atomic_write_ptr(&slot->p, p); // make it available again
+          }
+          else {
+            // keep it
+            *memid = slot->memid;
+            *large = slot->is_large;
+            *is_zero = false;
+            bool committed = slot->is_committed;
+            mi_atomic_write_ptr(&slot->p, NULL); // set it free
+            if (*commit && !committed) {
+              bool commit_zero;
+              _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
+            }            
+            *commit = committed;
+            return p;
+          }
+        }
+      }
+    }
+  }
+  return NULL;
+}
+
+static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large) {
+  // only for segment blocks
+  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
+  
+  // try to add it to the cache
+  int numa_node = _mi_os_numa_node(NULL);
+  if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
+  mi_cache_slot_t* slot;
+  for (int i = 0; i < MI_CACHE_MAX; i++) {
+    slot = &cache[numa_node][i];
+    void* p = mi_atomic_read_ptr_relaxed(&slot->p);
+    if (p == NULL) { // free slot
+      if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) {
+        // claimed!
+        slot->memid = memid;
+        slot->is_committed = is_committed;
+        slot->is_large = is_large;
+        mi_atomic_write_ptr(&slot->p, start); // and make it available;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 /* -----------------------------------------------------------
   Arena Allocation
 ----------------------------------------------------------- */
@@ -148,6 +240,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
   bool default_large = false;
   if (large==NULL) large = &default_large;  // ensure `large != NULL`
 
+  const int numa_node = _mi_os_numa_node(tld); // current numa node
+
   // try to allocate in an arena if the alignment is small enough
   // and the object is not too large or too small.
   if (alignment <= MI_SEGMENT_ALIGN && 
@@ -155,8 +249,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
       size >= MI_ARENA_MIN_OBJ_SIZE)
   {
     const size_t bcount = mi_block_count_of_size(size);
-    const int numa_node = _mi_os_numa_node(tld); // current numa node
-
+    
     mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
     // try numa affine allocation
     for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
@@ -184,6 +277,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
     }
   }
 
+  // try to get from the cache 
+  void* p = mi_cache_pop(numa_node, size, alignment, commit, large, is_zero, memid, tld);
+  if (p != NULL) return p;
+
+
   // finally, fall back to the OS
   *is_zero = true;
   *memid   = MI_MEMID_OS;
@@ -202,13 +300,16 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, siz
   Arena free
 ----------------------------------------------------------- */
 
-void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
+void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats) {
   mi_assert_internal(size > 0 && stats != NULL);
   if (p==NULL) return;
   if (size==0) return;
+
   if (memid == MI_MEMID_OS) {
     // was a direct OS allocation, pass through
-    _mi_os_free(p, size, stats);
+    if (!mi_cache_push(p, size, memid, is_committed, is_large)) {
+      _mi_os_free(p, size, stats);
+    }
   }
   else {
     // allocated in an arena
diff --git a/src/segment.c b/src/segment.c
index 99e382bc..54a0c8fe 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -284,7 +284,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
     _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
   }
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
-  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, tld->stats);
+  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_committed || (~segment->commit_mask == 0), segment->mem_is_fixed, tld->stats);
 }
 
 
diff --git a/test/test-stress.c b/test/test-stress.c
index 50cbf9bd..3aa65f41 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------------
+  /* ----------------------------------------------------------------------------
 Copyright (c) 2018,2019 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license.
@@ -64,9 +64,9 @@ static bool chance(size_t perc, random_t r) {
 
 static void* alloc_items(size_t items, random_t r) {
   if (chance(1, r)) {
-    if (chance(1,r)) items *= 1000;       // 0.01% giant
-    else if (chance(10,r)) items *= 100;  // 0.1% huge
-                      else items *= 10;   // 1% large objects;
+    if (chance(1, r)) items *= 1000;       // 0.01% giant
+    else if (chance(10, r)) items *= 100;  // 0.1% huge
+    else items *= 10;                      // 1% large objects;
   }
   if (items==40) items++;              // pthreads uses that size for stack increases
   uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));

From 321e18777e1dcf7214d84bb22ed537af7e4832ba Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 21 Nov 2019 19:53:43 -0800
Subject: [PATCH 020/352] wip: delayed decommit on segments

---
 include/mimalloc-internal.h |  2 +-
 include/mimalloc-types.h    | 14 +++---
 src/arena.c                 | 13 ++---
 src/options.c               | 10 ++--
 src/segment.c               | 95 ++++++++++++++++++++++++++++++++-----
 5 files changed, 103 insertions(+), 31 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index a1f7f870..7ce8d52b 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -66,7 +66,7 @@ bool      _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats)
 // arena.c
 void*     _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 void*     _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-void      _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats);
+void      _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld);
 
 
 // "segment.c"
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index f7eafd39..d4c1e1c1 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -235,6 +235,9 @@ typedef enum mi_segment_kind_e {
 
 typedef mi_page_t mi_slice_t;
 
+typedef int64_t  mi_msecs_t;
+
+
 // Segments are large allocated memory blocks (2mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
@@ -243,6 +246,11 @@ typedef struct mi_segment_s {
   bool              mem_is_fixed;       // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
   bool              mem_is_committed;   // `true` if the whole segment is eagerly committed
 
+  bool              allow_decommit;
+  mi_msecs_t        decommit_expire;
+  uintptr_t         decommit_mask;
+  uintptr_t         commit_mask;
+
   // from here is zero initialized
   struct mi_segment_s*          next;   // the list of freed segments in the cache
   volatile _Atomic(struct mi_segment_s*) abandoned_next;
@@ -254,9 +262,6 @@ typedef struct mi_segment_s {
   size_t            segment_slices;      // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
   size_t            segment_info_slices; // initial slices we are using segment info and possible guard pages.
 
-  bool              allow_decommit;
-  uintptr_t         commit_mask;
-
   // layout like this to optimize access in `mi_free`
   mi_segment_kind_t kind;
   volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment
@@ -415,9 +420,6 @@ typedef struct mi_span_queue_s {
 
 #define MI_SEGMENT_BIN_MAX (35)     // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)
 
-typedef int64_t  mi_msecs_t;
-
-
 // OS thread local data
 typedef struct mi_os_tld_s {
   size_t                region_idx;   // start point for next allocation
diff --git a/src/arena.c b/src/arena.c
index eecbc298..4602c42c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -188,7 +188,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   return NULL;
 }
 
-static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large) {
+static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) {
   // only for segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
   
@@ -202,8 +202,9 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
     if (p == NULL) { // free slot
       if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) {
         // claimed!
-        slot->memid = memid;
+        // _mi_os_decommit(start, size, tld->stats);
         slot->is_committed = is_committed;
+        slot->memid = memid;
         slot->is_large = is_large;
         mi_atomic_write_ptr(&slot->p, start); // and make it available;
         return true;
@@ -317,15 +318,15 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, siz
   Arena free
 ----------------------------------------------------------- */
 
-void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats) {
-  mi_assert_internal(size > 0 && stats != NULL);
+void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) {
+  mi_assert_internal(size > 0 && tld->stats != NULL);
   if (p==NULL) return;
   if (size==0) return;
 
   if (memid == MI_MEMID_OS) {
     // was a direct OS allocation, pass through
-    if (!mi_cache_push(p, size, memid, is_committed, is_large)) {
-      _mi_os_free(p, size, stats);
+    if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) {
+      _mi_os_free(p, size, tld->stats);
     }
   }
   else {
diff --git a/src/options.c b/src/options.c
index 03ee3e0c..f1d94c80 100644
--- a/src/options.c
+++ b/src/options.c
@@ -56,7 +56,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
+  { 0, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
@@ -65,10 +65,10 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
   { 0, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
-  { 500,UNINIT, MI_OPTION(reset_delay) },        // reset delay in milli-seconds
-  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
-  { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
-  { 16,  UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
+  { 1000, UNINIT, MI_OPTION(reset_delay) },        // reset delay in milli-seconds
+  { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
+  { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
+  { 16,   UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/src/segment.c b/src/segment.c
index 473cd696..a644708a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -15,7 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 static void mi_segment_map_allocated_at(const mi_segment_t* segment);
 static void mi_segment_map_freed_at(const mi_segment_t* segment);
-
+static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
 
 /* -----------------------------------------------------------
   Segment allocation
@@ -286,8 +286,12 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
     _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
   }
 
+  // purge delayed decommits now
+  mi_segment_delayed_decommit(segment,true,tld->stats);
+  
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
-  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_committed || (~segment->commit_mask == 0), segment->mem_is_fixed, tld->stats);
+  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, 
+                   (~segment->commit_mask == 0 && segment->decommit_mask == 0), segment->mem_is_fixed, tld->os);
 }
 
 
@@ -331,7 +335,8 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
   if (segment->segment_slices != MI_SLICES_PER_SEGMENT || mi_segment_cache_full(tld)) {
     return false;
   }
-
+  // mi_segment_delayed_decommit(segment, true, tld->stats);
+  // segment->decommit_mask = 0;
   mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT);
   mi_assert_internal(segment->next == NULL);  
   segment->next = tld->cache;
@@ -395,29 +400,79 @@ static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   // commit liberal, but decommit conservative
   uint8_t* start;
   size_t   full_size;
-  uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size);
+  uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size);  
   if (mask==0 || full_size==0) return;
 
   if (commit && (segment->commit_mask & mask) != mask) {
     bool is_zero = false;
     _mi_os_commit(start,full_size,&is_zero,stats);
-    segment->commit_mask |= mask; 
+    segment->commit_mask |= mask;     
   }
   else if (!commit && (segment->commit_mask & mask) != 0) {
+    mi_assert_internal((void*)start != (void*)segment);
     _mi_os_decommit(start, full_size,stats);
     segment->commit_mask &= ~mask;
   }
+  // increase expiration of reusing part of the delayed decommit
+  if (commit && (segment->decommit_mask & mask) != 0) {
+    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+  }
+  // always undo delayed decommits 
+  segment->decommit_mask &= ~mask;    
 }
 
 static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  if (~segment->commit_mask == 0) return; // fully committed
+  if (~segment->commit_mask == 0 && segment->decommit_mask==0) return; // fully committed
   mi_segment_commitx(segment,true,p,size,stats);
 }
 
 static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
   if (!segment->allow_decommit) return; // TODO: check option_decommit?
   if (segment->commit_mask == 1) return; // fully decommitted
-  mi_segment_commitx(segment, false, p, size, stats);
+  if (mi_option_get(mi_option_reset_delay) == 0) {
+    mi_segment_commitx(segment, false, p, size, stats);
+  }
+  else {
+    // create mask
+    uint8_t* start;
+    size_t   full_size;
+    uintptr_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size);
+    if (mask==0 || full_size==0) return;
+    
+    // update delayed commit
+    segment->decommit_mask |= mask;
+    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+  }  
+}
+
+static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) {
+  if (segment->decommit_mask == 0) return;
+  mi_msecs_t now = _mi_clock_now();
+  if (!force && now < segment->decommit_expire) return;
+
+  uintptr_t mask = segment->decommit_mask;
+  segment->decommit_expire = 0;
+  segment->decommit_mask = 0;
+
+  uintptr_t idx = 0;
+  while (mask != 0) {
+    // count ones
+    size_t count = 0;
+    while ((mask&1)==1) {
+      mask >>= 1;
+      count++;
+    }
+    // if found, decommit that sequence
+    if (count > 0) {
+      uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE);
+      size_t size = count * MI_COMMIT_SIZE;
+      mi_segment_commitx(segment, false, p, size, stats);
+      idx += count;
+    }
+    // shift out the 0
+    mask >>= 1;
+    idx++;
+  }
 }
 
 static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
@@ -440,7 +495,7 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
 
   // perhaps decommit
   mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats);
-
+  
   // and push it on the free page queue (if it was not a huge page)
   if (sq != NULL) mi_span_queue_push( sq, slice );
              else slice->block_size = 0; // mark huge page as free anyways
@@ -599,6 +654,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   // Try to get from our cache first
   mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld);
   bool is_zero = false;
+  bool commit_info_still_good = (segment != NULL);
   if (segment==NULL) {
     // Allocate the segment from the OS
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
@@ -614,7 +670,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
     }
     segment->memid = memid;
     segment->mem_is_fixed = mem_large;
-    segment->mem_is_committed = commit;
+    segment->mem_is_committed = mi_option_is_enabled(mi_option_eager_commit); // commit;
     mi_segments_track_size((long)(segment_size), tld);
     mi_segment_map_allocated_at(segment);
   }
@@ -625,7 +681,14 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
     size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
     memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices);
   }
-  
+
+  if (!commit_info_still_good) {
+    segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed
+    segment->allow_decommit = mi_option_is_enabled(mi_option_allow_decommit);
+    segment->decommit_expire = 0;
+    segment->decommit_mask = 0;
+  }
+
   // initialize segment info
   segment->segment_slices = segment_slices;
   segment->segment_info_slices = info_slices;
@@ -633,8 +696,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
   segment->cookie = _mi_ptr_cookie(segment);
   segment->slice_entries = slice_entries;
   segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
-  segment->allow_decommit = !commit && mi_option_is_enabled(mi_option_allow_decommit);
-  segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed
+
   // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1));
   _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));
 
@@ -723,6 +785,7 @@ static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t requir
   }
   mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
   mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());  
+  mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
   return page;
 }
 
@@ -799,7 +862,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used > 0);
   mi_assert_internal(segment->abandoned_next == NULL);
   mi_assert_expensive(mi_segment_is_valid(segment,tld));
-
+  
   // remove the free pages from our lists
   mi_slice_t* slice = &segment->slices[0];
   const mi_slice_t* end = mi_segment_slices_end(segment);
@@ -813,6 +876,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
     slice = slice + slice->slice_count;
   }
 
+  // force delayed decommits
+  mi_segment_delayed_decommit(segment, true, tld->stats);
+  //segment->decommit_mask = 0;
+  
   // add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
@@ -866,6 +933,8 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     mi_segments_track_size((long)mi_segment_size(segment),tld);
     mi_assert_internal(segment->next == NULL);
     _mi_stat_decrease(&tld->stats->segments_abandoned,1);
+    mi_assert_internal(segment->decommit_mask == 0);
+
 
     mi_slice_t* slice = &segment->slices[0];
     const mi_slice_t* end = mi_segment_slices_end(segment);

From 7da00c1220f77120f10e975dba881c93cb21626a Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 21 Nov 2019 20:57:32 -0800
Subject: [PATCH 021/352] wip: full decommit delay, for arena cache as well

---
 include/mimalloc.h |  1 +
 src/arena.c        | 55 +++++++++++++++++++++++++++++++++++++++++-----
 src/options.c      |  5 +++--
 src/segment.c      | 12 +++++-----
 4 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 56d54d94..2c69514b 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -275,6 +275,7 @@ typedef enum mi_option_e {
   mi_option_eager_commit_delay,
   mi_option_allow_decommit,
   mi_option_reset_delay,
+  mi_option_arena_reset_delay,
   mi_option_use_numa_nodes,
   mi_option_os_tag,
   mi_option_max_errors,
diff --git a/src/arena.c b/src/arena.c
index 4602c42c..c1b47073 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -124,16 +124,17 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 /* -----------------------------------------------------------
   Arena cache
 ----------------------------------------------------------- */
-#define MI_CACHE_MAX (64)
+#define MI_CACHE_MAX (64)  // ~4GiB
 #define MI_MAX_NUMA  (16)
 
 #define MI_SLOT_IN_USE ((void*)1)
 
 typedef struct mi_cache_slot_s {
   volatile _Atomic(void*) p;
-  volatile size_t memid;
-  volatile bool   is_committed;
-  volatile bool   is_large;
+  volatile size_t     memid;
+  volatile mi_msecs_t expire;
+  volatile bool       is_committed;
+  volatile bool       is_large;
 } mi_cache_slot_t;
 
 static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX];
@@ -188,7 +189,43 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   return NULL;
 }
 
-static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) {
+static void mi_cache_purge(mi_os_tld_t* tld) {
+  // TODO: for each numa node instead?
+  // if (mi_option_get(mi_option_arena_reset_delay) == 0) return;
+
+  mi_msecs_t now = _mi_clock_now();
+  int numa_node = _mi_os_numa_node(NULL);
+  if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
+  mi_cache_slot_t* slot;
+  int purged = 0;
+  for (int i = 0; i < MI_CACHE_MAX; i++) {
+    slot = &cache[numa_node][i];
+    void* p = mi_atomic_read_ptr_relaxed(&slot->p);
+    if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) {
+      mi_msecs_t expire = slot->expire;
+      if (now >= expire) {
+        // expired, try to claim it
+        if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) {
+          // claimed! test again
+          if (!slot->is_committed && !slot->is_large && now >= slot->expire) {
+            _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
+            slot->is_committed = false;
+          }
+          // and unclaim again
+          mi_atomic_write_ptr(&slot->p, p);
+          purged++;
+          if (purged >= 4) break; // limit to at most 4 decommits per push
+        }
+      }
+    }
+  }
+}
+
+
+static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) 
+{
+  mi_cache_purge(tld);
+  
   // only for segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
   
@@ -202,7 +239,12 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
     if (p == NULL) { // free slot
       if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) {
         // claimed!
-        // _mi_os_decommit(start, size, tld->stats);
+        long delay = mi_option_get(mi_option_arena_reset_delay);
+        if (delay == 0 && !is_large) {
+          _mi_os_decommit(start, size, tld->stats);
+          is_committed = false;
+        }
+        slot->expire = (is_committed ? 0 : _mi_clock_now() + delay);
         slot->is_committed = is_committed;
         slot->memid = memid;
         slot->is_large = is_large;
@@ -214,6 +256,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
   return false;
 }
 
+
 /* -----------------------------------------------------------
   Arena Allocation
 ----------------------------------------------------------- */
diff --git a/src/options.c b/src/options.c
index f1d94c80..dcee89e6 100644
--- a/src/options.c
+++ b/src/options.c
@@ -64,8 +64,9 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 0, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
-  { 1000, UNINIT, MI_OPTION(reset_delay) },        // reset delay in milli-seconds
+  { 1, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
+  { 1000, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
+  { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,   UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
diff --git a/src/segment.c b/src/segment.c
index a644708a..247ce28d 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -286,8 +286,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
     _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
   }
 
-  // purge delayed decommits now
-  mi_segment_delayed_decommit(segment,true,tld->stats);
+  // purge delayed decommits now? (no, leave it to the cache)
+  // mi_segment_delayed_decommit(segment,true,tld->stats);
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   _mi_arena_free(segment, mi_segment_size(segment), segment->memid, 
@@ -335,8 +335,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
   if (segment->segment_slices != MI_SLICES_PER_SEGMENT || mi_segment_cache_full(tld)) {
     return false;
   }
-  // mi_segment_delayed_decommit(segment, true, tld->stats);
-  // segment->decommit_mask = 0;
+  // mi_segment_delayed_decommit(segment, true, tld->stats);  
   mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT);
   mi_assert_internal(segment->next == NULL);  
   segment->next = tld->cache;
@@ -876,9 +875,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
     slice = slice + slice->slice_count;
   }
 
-  // force delayed decommits
-  mi_segment_delayed_decommit(segment, true, tld->stats);
-  //segment->decommit_mask = 0;
+  // force delayed decommits instead?
+  mi_segment_delayed_decommit(segment, false, tld->stats);    
   
   // add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);

From ec0005b91978171fa8124e3567da7fda070cb6a8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 24 Nov 2019 19:06:30 -0800
Subject: [PATCH 022/352] more fine grained commit tracking per MiB

---
 include/mimalloc-types.h | 2 +-
 src/arena.c              | 2 +-
 src/options.c            | 2 +-
 src/segment.c            | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index d4c1e1c1..3cdc4963 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -227,7 +227,7 @@ typedef enum mi_segment_kind_e {
   MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
 } mi_segment_kind_t;
 
-#define MI_COMMIT_SIZE      (2UL<<20)   // OS large page size
+#define MI_COMMIT_SIZE      (1UL<<20)   // OS large page size
 
 #if ((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE > 8*MI_INTPTR_SIZE)
 #error "not enough commit bits to cover the segment size"
diff --git a/src/arena.c b/src/arena.c
index c1b47073..f3dd690f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -203,7 +203,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
     void* p = mi_atomic_read_ptr_relaxed(&slot->p);
     if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) {
       mi_msecs_t expire = slot->expire;
-      if (now >= expire) {
+      if (expire != 0 && now >= expire) {
         // expired, try to claim it
         if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) {
           // claimed! test again
diff --git a/src/options.c b/src/options.c
index 6fd887a3..dcee89e6 100644
--- a/src/options.c
+++ b/src/options.c
@@ -64,7 +64,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 0, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
+  { 1, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
   { 1000, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
diff --git a/src/segment.c b/src/segment.c
index 042ce2b7..d0580b74 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -381,6 +381,7 @@ static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative
   mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0);
   *start_p   = (uint8_t*)segment + start;
   *full_size = (end > start ? end - start : 0);
+  if (*full_size == 0) return 0;
 
   uintptr_t bitidx = start / MI_COMMIT_SIZE;
   mi_assert_internal(bitidx < (MI_INTPTR_SIZE*8));
@@ -931,8 +932,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     mi_segments_track_size((long)mi_segment_size(segment),tld);
     mi_assert_internal(segment->next == NULL);
     _mi_stat_decrease(&tld->stats->segments_abandoned,1);
-    mi_assert_internal(segment->decommit_mask == 0);
-
+    //mi_assert_internal(segment->decommit_mask == 0);
 
     mi_slice_t* slice = &segment->slices[0];
     const mi_slice_t* end = mi_segment_slices_end(segment);

From 41af533a344f78858dc934e9e2994836e81adab3 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 24 Nov 2019 19:17:56 -0800
Subject: [PATCH 023/352] define commit unit in terms of segment size

---
 include/mimalloc-types.h | 13 +++++++------
 src/options.c            |  2 +-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 3cdc4963..9e183ca5 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 
 #define MI_INTPTR_SIZE  (1<<MI_INTPTR_SHIFT)
+#define MI_INTPTR_BITS  (8*MI_INTPTR_SIZE)
 
 #define KiB     ((size_t)1024)
 #define MiB     (KiB*KiB)
@@ -95,13 +96,13 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 // Derived constants
-#define MI_SEGMENT_SIZE                   ((size_t)1<<MI_SEGMENT_SHIFT)
+#define MI_SEGMENT_SIZE                   (1ULL<<MI_SEGMENT_SHIFT)
 #define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
-#define MI_SEGMENT_SLICE_SIZE             ((size_t)1<< MI_SEGMENT_SLICE_SHIFT)
+#define MI_SEGMENT_SLICE_SIZE             (1ULL<< MI_SEGMENT_SLICE_SHIFT)
 #define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
 
-#define MI_SMALL_PAGE_SIZE                (1<<MI_SMALL_PAGE_SHIFT)
-#define MI_MEDIUM_PAGE_SIZE               (1<<MI_MEDIUM_PAGE_SHIFT)
+#define MI_SMALL_PAGE_SIZE                (1ULL<<MI_SMALL_PAGE_SHIFT)
+#define MI_MEDIUM_PAGE_SIZE               (1ULL<<MI_MEDIUM_PAGE_SHIFT)
 
 #define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/8)   // 8kb on 64-bit
 
@@ -227,9 +228,9 @@ typedef enum mi_segment_kind_e {
   MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
 } mi_segment_kind_t;
 
-#define MI_COMMIT_SIZE      (1UL<<20)   // OS large page size
+#define MI_COMMIT_SIZE    (MI_SEGMENT_SIZE/MI_INTPTR_BITS)
 
-#if ((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE > 8*MI_INTPTR_SIZE)
+#if (((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE) > 8*MI_INTPTR_SIZE)
 #error "not enough commit bits to cover the segment size"
 #endif
 
diff --git a/src/options.c b/src/options.c
index dcee89e6..6fd887a3 100644
--- a/src/options.c
+++ b/src/options.c
@@ -64,7 +64,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 1, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
+  { 0, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
   { 1000, UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 

From 2808c9f4c871a32752d8e6e32fc3841cd1a0fd2e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 13 Jan 2020 18:01:52 -0800
Subject: [PATCH 024/352] default to non-eager commit

---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index d3d9f9be..328fb86e 100644
--- a/src/options.c
+++ b/src/options.c
@@ -56,7 +56,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit on demand
+  { 0, UNINIT, MI_OPTION(eager_commit) },        // commit on demand
   #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
   { 0, UNINIT, MI_OPTION(eager_region_commit) },
   { 1, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory

From 94bff89347715de069cce2345d1a57f6045a131b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 13 Jan 2020 20:48:18 -0800
Subject: [PATCH 025/352] ensure page reset flag is always reset

---
 src/segment.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/segment.c b/src/segment.c
index d94bc894..734ca1c7 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -602,6 +602,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
 
   // ensure the memory is committed
   mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
+  page->is_reset = false;
   segment->used++;
   return page;
 }

From 88b141cf1fb1ff9ba3fd033ac9f5e6b7eae4d919 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 13 Jan 2020 20:48:37 -0800
Subject: [PATCH 026/352] ensure proper padding for the page structure

---
 include/mimalloc-types.h | 6 +++---
 src/init.c               | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index b0b4a44f..ec382b5e 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -211,9 +211,9 @@ typedef struct mi_page_s {
   struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
 
   // improve page index calculation
-  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words
-  #if (MI_INTPTR_SIZE==4)
-  void*                 padding[1];        // 12/14 words on 32-bit plain
+  // without padding: 11 words on 64-bit, 14 on 32-bit. Secure adds two words
+  #if (MI_INTPTR_SIZE==8)
+  void*                 padding[1];        // 12/14 words on 64-bit
   #endif
 } mi_page_t;
 
diff --git a/src/init.c b/src/init.c
index 1409faaa..51c18d93 100644
--- a/src/init.c
+++ b/src/init.c
@@ -26,7 +26,7 @@ const mi_page_t _mi_page_empty = {
   NULL,
   ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
   0, NULL, NULL, NULL
-  #if (MI_INTPTR_SIZE==4)
+  #if (MI_INTPTR_SIZE==8)
   , { NULL } // padding
   #endif
 };

From 0028272cf4b39aa27e44b6fd0973744604beb6d7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 20 Jan 2020 22:33:29 -0800
Subject: [PATCH 027/352] small fixes, reduced segment size, fix merge
 conflicts

---
 include/mimalloc-types.h |  2 +-
 src/alloc.c              |  1 +
 src/arena.c              | 30 ++++++++++++++++++++----------
 src/options.c            |  2 +-
 src/page.c               | 19 ++++++++++++++++---
 src/segment.c            | 28 ++++++----------------------
 src/static.c             |  4 ----
 7 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index a5fd12b8..6685b5a7 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -89,7 +89,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
 #define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64kb
-#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
+#define MI_SEGMENT_SHIFT                  ( 8 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512kb
diff --git a/src/alloc.c b/src/alloc.c
index de8bd3d2..6370b19d 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -195,6 +195,7 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag
     mi_tld_t* tld = heap->tld;
     const size_t bsize = mi_page_block_size(page);
     if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+      mi_assert_internal(false);
       _mi_stat_decrease(&tld->stats.large, bsize);
     }
     else {
diff --git a/src/arena.c b/src/arena.c
index 104a7e83..4fb1364a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1,3 +1,4 @@
+
 /* ----------------------------------------------------------------------------
 Copyright (c) 2019, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
@@ -36,7 +37,8 @@ of 256MiB in practice.
 
 // os.c
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
-void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
+// void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
+void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
 
 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
 void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
@@ -178,8 +180,11 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
             if (*commit && !committed) {
               bool commit_zero;
               _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
+              *commit = true;
             }            
-            *commit = committed;
+            else {
+              *commit = committed;
+            }
             return p;
           }
         }
@@ -207,7 +212,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
         // expired, try to claim it
         if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) {
           // claimed! test again
-          if (!slot->is_committed && !slot->is_large && now >= slot->expire) {
+          if (slot->is_committed && !slot->is_large && now >= slot->expire) {
             _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
             slot->is_committed = false;
           }
@@ -239,15 +244,20 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
     if (p == NULL) { // free slot
       if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) {
         // claimed!
-        long delay = mi_option_get(mi_option_arena_reset_delay);
-        if (delay == 0 && !is_large) {
-          _mi_os_decommit(start, size, tld->stats);
-          is_committed = false;
-        }
-        slot->expire = (is_committed ? 0 : _mi_clock_now() + delay);
+        slot->expire = 0;
         slot->is_committed = is_committed;
         slot->memid = memid;
         slot->is_large = is_large;
+        if (is_committed) {
+          long delay = mi_option_get(mi_option_arena_reset_delay);
+          if (delay == 0 && !is_large) {
+            _mi_os_decommit(start, size, tld->stats);
+            slot->is_committed = false;
+          }
+          else {
+            slot->expire = _mi_clock_now() + delay;
+          }
+        }
         mi_atomic_write_ptr(&slot->p, start); // and make it available;
         return true;
       }
@@ -369,7 +379,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
   if (memid == MI_MEMID_OS) {
     // was a direct OS allocation, pass through
     if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) {
-      _mi_os_free(p, size, tld->stats);
+      _mi_os_free_ex(p, size, is_committed, tld->stats);
     }
   }
   else {
diff --git a/src/options.c b/src/options.c
index 489f07b3..1130e2e3 100644
--- a/src/options.c
+++ b/src/options.c
@@ -71,7 +71,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  { 0, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
+  { 1, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
   { 100,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
diff --git a/src/page.c b/src/page.c
index 44f32a73..13706100 100644
--- a/src/page.c
+++ b/src/page.c
@@ -378,9 +378,22 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   // no more aligned blocks in here
   mi_page_set_has_aligned(page, false);
 
+  mi_heap_t* heap = mi_page_heap(page);
+  const size_t bsize = mi_page_block_size(page);
+  if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) {
+    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {      
+      _mi_stat_decrease(&heap->tld->stats.large, bsize);
+    }
+    else {
+      // not strictly necessary as we never get here for a huge page
+      mi_assert_internal(false);
+      _mi_stat_decrease(&heap->tld->stats.huge, bsize);      
+    }
+  }
+
   // remove from the page list
   // (no need to do _mi_heap_delayed_free first as all blocks are already free)
-  mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
+  mi_segments_tld_t* segments_tld = &heap->tld->segments;
   mi_page_queue_remove(pq, page);
 
   // and free it
@@ -769,11 +782,11 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
       mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
     }
     if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
-      _mi_stat_increase(&heap->tld->stats.large, block_size);
+      _mi_stat_increase(&heap->tld->stats.large, bsize);
       _mi_stat_counter_increase(&heap->tld->stats.large_count, 1);
     }
     else {
-      _mi_stat_increase(&heap->tld->stats.huge, block_size);
+      _mi_stat_increase(&heap->tld->stats.huge, bsize);
       _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1);
     }
   }
diff --git a/src/segment.c b/src/segment.c
index 5ce4d7ba..b3a33d60 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -7,6 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
 #include "mimalloc-atomic.h"
+#include "bitmap.inc.c"  // mi_bsr
 
 #include <string.h>  // memset
 #include <stdio.h>
@@ -49,23 +50,7 @@ static uint8_t* mi_slice_start(const mi_slice_t* slice) {
    Bins
 ----------------------------------------------------------- */
 // Use bit scan forward to quickly find the first zero bit if it is available
-#if defined(_MSC_VER)
-#include <intrin.h>
-static inline size_t mi_bsr(uintptr_t x) {
-  if (x==0) return 8*MI_INTPTR_SIZE;
-  DWORD idx;
-  #if (MI_INTPTR_SIZE==8)
-  _BitScanReverse64(&idx, x);
-  #else
-  _BitScanReverse(&idx, x);
-  #endif
-  return idx;
-}
-#elif defined(__GNUC__) || defined(__clang__)
-static inline size_t mi_bsr(uintptr_t x) {
-  return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x));
-}
-#else
+#if !defined(MI_HAVE_BITSCAN)
 #error "define bsr for your platform"
 #endif
 
@@ -410,7 +395,7 @@ static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   }
   else if (!commit && (segment->commit_mask & mask) != 0) {
     mi_assert_internal((void*)start != (void*)segment);
-    _mi_os_decommit(start, full_size,stats);
+    _mi_os_decommit(start, full_size, stats);
     segment->commit_mask &= ~mask;
   }
   // increase expiration of reusing part of the delayed decommit
@@ -902,8 +887,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
     slice = slice + slice->slice_count;
   }
 
-  // force delayed decommits instead?
-  mi_segment_delayed_decommit(segment, false, tld->stats);    
+  // perform delayed decommits instead
+  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld->stats);    
   
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
@@ -1018,7 +1003,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     if (segment->used == 0) {  // due to page_clear
       mi_segment_free(segment,false,tld);
     }
-    
+
     // go on
     segment = next; 
   }
@@ -1185,6 +1170,5 @@ static void* mi_segment_range_of(const void* p, size_t* size) {
   mi_reset_delayed(tld);
   mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
   return page;
->>>>>>> dev
 }
 */
diff --git a/src/static.c b/src/static.c
index bcfaa119..b3c71e02 100644
--- a/src/static.c
+++ b/src/static.c
@@ -16,12 +16,8 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "stats.c"
 #include "random.c"
 #include "os.c"
-<<<<<<< HEAD
 //#include "memory.c"
-=======
 #include "arena.c"
-#include "memory.c"
->>>>>>> dev
 #include "segment.c"
 #include "page.c"
 #include "heap.c"

From caa5e51a67dd7c1a6efe0393a4f78986d2c9e547 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Wed, 22 Jan 2020 11:29:32 -0800
Subject: [PATCH 028/352] align size of page_t, increase slices per segment

---
 include/mimalloc-types.h | 32 ++++++++++++++++++--------------
 src/init.c               |  3 +++
 src/page.c               |  4 ++--
 src/segment.c            |  1 +
 4 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 6685b5a7..661e2856 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -89,7 +89,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
 #define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64kb
-#define MI_SEGMENT_SHIFT                  ( 8 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
+#define MI_SEGMENT_SHIFT                  ( 9 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512kb
@@ -104,7 +104,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_SMALL_PAGE_SIZE                (1ULL<<MI_SMALL_PAGE_SHIFT)
 #define MI_MEDIUM_PAGE_SIZE               (1ULL<<MI_MEDIUM_PAGE_SHIFT)
 
-#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/8)   // 8kb on 64-bit
+#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 8kb on 64-bit
 
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128kb on 64-bit
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   // 64kb on 64-bit
@@ -201,31 +201,35 @@ typedef struct mi_page_s {
   // "owned" by the segment
   uint32_t              slice_count;       // slices in this page (0 if not a page)
   uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
-  uint8_t               is_reset:1;        // `true` if the page memory was reset
-  uint8_t               is_committed:1;    // `true` if the page virtual memory is committed
-  uint8_t               is_zero_init:1;    // `true` if the page was zero initialized
+  uint8_t               is_reset : 1;        // `true` if the page memory was reset
+  uint8_t               is_committed : 1;    // `true` if the page virtual memory is committed
+  uint8_t               is_zero_init : 1;    // `true` if the page was zero initialized
 
   // layout like this to optimize access in `mi_malloc` and `mi_free`
   uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
   uint16_t              reserved;          // number of blocks reserved in memory
   mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
-  uint8_t               is_zero:1;         // `true` if the blocks in the free list are zero initialized
-  uint8_t               retire_expire:7;   // expiration count for retired blocks
+  uint8_t               is_zero : 1;         // `true` if the blocks in the free list are zero initialized
+  uint8_t               retire_expire : 7;   // expiration count for retired blocks
 
-  mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
-  #ifdef MI_ENCODE_FREELIST
+  mi_block_t* free;              // list of available free blocks (`malloc` allocates from this list)
+#ifdef MI_ENCODE_FREELIST
   uintptr_t             key[2];            // two random keys to encode the free lists (see `_mi_block_next`)
-  #endif
+#endif
   uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
   uint32_t              xblock_size;       // size available in each block (always `>0`) 
 
-  mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
+  mi_block_t* local_free;        // list of deferred free blocks by this thread (migrates to `free`)
   volatile _Atomic(mi_thread_free_t) xthread_free;   // list of deferred free blocks freed by other threads
   volatile _Atomic(uintptr_t)        xheap;
-  
-  struct mi_page_s*     next;              // next page owned by this thread with the same `block_size`
-  struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
 
+  struct mi_page_s* next;              // next page owned by this thread with the same `block_size`
+  struct mi_page_s* prev;              // previous page owned by this thread with the same `block_size`
+
+  // 64-bit 9 words, 32-bit 12 words, (+2 for secure)
+  #if MI_INTPTR_SIZE==8
+  uintptr_t padding[1];
+  #endif
 } mi_page_t;
 
 
diff --git a/src/init.c b/src/init.c
index a0873615..e77185ff 100644
--- a/src/init.c
+++ b/src/init.c
@@ -28,6 +28,9 @@ const mi_page_t _mi_page_empty = {
   ATOMIC_VAR_INIT(0), // xthread_free
   ATOMIC_VAR_INIT(0), // xheap
   NULL, NULL
+  #if MI_INTPTR_SIZE==8
+  , { 0 }  // padding
+  #endif
 };
 
 #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
diff --git a/src/page.c b/src/page.c
index 13706100..5b2a85f7 100644
--- a/src/page.c
+++ b/src/page.c
@@ -74,10 +74,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   mi_assert_internal(page->used <= page->capacity);
   mi_assert_internal(page->capacity <= page->reserved);
 
-  const size_t bsize = mi_page_block_size(page);
   mi_segment_t* segment = _mi_page_segment(page);
   uint8_t* start = _mi_page_start(segment,page,NULL);
   mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
+  //const size_t bsize = mi_page_block_size(page);
   //mi_assert_internal(start + page->capacity*page->block_size == page->top);
 
   mi_assert_internal(mi_page_list_is_valid(page,page->free));
@@ -86,7 +86,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   #if MI_DEBUG>3 // generally too expensive to check this
   if (page->flags.is_zero) {
     for(mi_block_t* block = page->free; block != NULL; mi_block_next(page,block)) {
-      mi_assert_expensive(mi_mem_is_zero(block + 1, page->block_size - sizeof(mi_block_t)));
+      mi_assert_expensive(mi_mem_is_zero(block + 1, bsize - sizeof(mi_block_t)));
     }
   }
   #endif
diff --git a/src/segment.c b/src/segment.c
index b3a33d60..22757968 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -458,6 +458,7 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
     mask >>= 1;
     idx++;
   }
+  mi_assert_internal(segment->decommit_mask == 0);
 }
 
 static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {

From a46d20a681cbae2d5e353974db9331b7d84c8eed Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 22 Jan 2020 20:53:44 -0800
Subject: [PATCH 029/352] merge with new atomic macros

---
 include/mimalloc-types.h |  2 +-
 src/arena.c              | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 661e2856..b65bf266 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -89,7 +89,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
 #define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64kb
-#define MI_SEGMENT_SHIFT                  ( 9 + MI_SEGMENT_SLICE_SHIFT)  // 64mb
+#define MI_SEGMENT_SHIFT                  ( 8 + MI_SEGMENT_SLICE_SHIFT)  // 16mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512kb
diff --git a/src/arena.c b/src/arena.c
index fe943e07..167cf751 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -126,7 +126,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 /* -----------------------------------------------------------
   Arena cache
 ----------------------------------------------------------- */
-#define MI_CACHE_MAX (64)  // ~4GiB
+#define MI_CACHE_MAX (128)  // ~4GiB
 #define MI_MAX_NUMA  (16)
 
 #define MI_SLOT_IN_USE ((void*)1)
@@ -162,13 +162,13 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   for (int n = numa_min; n <= numa_max; n++) {
     for (int i = 0; i < MI_CACHE_MAX; i++) {
       slot = &cache[n][i];
-      void* p = mi_atomic_read_ptr_relaxed(&slot->p);
+      void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p);
       if (p > MI_SLOT_IN_USE) { // not NULL or 1
-        if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) {
+        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
           // claimed
           if (!*large && slot->is_large) {
             // back out again
-            mi_atomic_write_ptr(&slot->p, p); // make it available again
+            mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again
           }
           else {
             // keep it
@@ -176,7 +176,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
             *large = slot->is_large;
             *is_zero = false;
             bool committed = slot->is_committed;
-            mi_atomic_write_ptr(&slot->p, NULL); // set it free
+            mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free
             if (*commit && !committed) {
               bool commit_zero;
               _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
@@ -205,19 +205,19 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
   int purged = 0;
   for (int i = 0; i < MI_CACHE_MAX; i++) {
     slot = &cache[numa_node][i];
-    void* p = mi_atomic_read_ptr_relaxed(&slot->p);
+    void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
     if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) {
       mi_msecs_t expire = slot->expire;
       if (expire != 0 && now >= expire) {
         // expired, try to claim it
-        if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) {
+        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
           // claimed! test again
           if (slot->is_committed && !slot->is_large && now >= slot->expire) {
             _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
             slot->is_committed = false;
           }
           // and unclaim again
-          mi_atomic_write_ptr(&slot->p, p);
+          mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p);
           purged++;
           if (purged >= 4) break; // limit to at most 4 decommits per push
         }
@@ -240,9 +240,9 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
   mi_cache_slot_t* slot;
   for (int i = 0; i < MI_CACHE_MAX; i++) {
     slot = &cache[numa_node][i];
-    void* p = mi_atomic_read_ptr_relaxed(&slot->p);
+    void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
     if (p == NULL) { // free slot
-      if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) {
+      if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) {
         // claimed!
         slot->expire = 0;
         slot->is_committed = is_committed;
@@ -258,7 +258,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
             slot->expire = _mi_clock_now() + delay;
           }
         }
-        mi_atomic_write_ptr(&slot->p, start); // and make it available;
+        mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available;
         return true;
       }
     }

From 09b98e0f7fee183df0e627ddcedcd9b870b156cf Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 27 Jan 2020 22:14:10 -0800
Subject: [PATCH 030/352] merge from dev-exp; resolve conflicts

---
 CMakeLists.txt | 4 ----
 src/static.c   | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f332156d..ab3946ad 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,10 +21,6 @@ set(mi_sources
     src/random.c
     src/os.c
     src/arena.c
-<<<<<<< HEAD
-=======
-    src/region.c
->>>>>>> dev-exp
     src/segment.c
     src/page.c
     src/alloc.c
diff --git a/src/static.c b/src/static.c
index 302fa72e..b3c71e02 100644
--- a/src/static.c
+++ b/src/static.c
@@ -18,10 +18,6 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "os.c"
 //#include "memory.c"
 #include "arena.c"
-<<<<<<< HEAD
-=======
-#include "region.c"
->>>>>>> dev-exp
 #include "segment.c"
 #include "page.c"
 #include "heap.c"

From 54e206a0a1f6aac0071a35ff5279d6c035a68b35 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 27 Jan 2020 22:41:24 -0800
Subject: [PATCH 031/352] increase retire page size

---
 src/page.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/page.c b/src/page.c
index cb193579..dcd39ed7 100644
--- a/src/page.c
+++ b/src/page.c
@@ -393,6 +393,9 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   _mi_segment_page_free(page, force, segments_tld);
 }
 
+
+#define MI_MAX_RETIRE_SIZE  (4*MI_SMALL_SIZE_MAX)
+
 // Retire a page with no more used blocks
 // Important to not retire too quickly though as new
 // allocations might coming.
@@ -413,7 +416,7 @@ void _mi_page_retire(mi_page_t* page) {
   // how to check this efficiently though...
   // for now, we don't retire if it is the only page left of this size class.
   mi_page_queue_t* pq = mi_page_queue_of(page);
-  if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) {
+  if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) {
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
       page->retire_expire = 16;
@@ -427,7 +430,7 @@ void _mi_page_retire(mi_page_t* page) {
 // free retired pages: we don't need to look at the entire queues
 // since we only retire pages that are the last one in a queue.
 void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
-  for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) {
+  for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_MAX_RETIRE_SIZE; pq++) {
     mi_page_t* page = pq->first;
     if (page != NULL && page->retire_expire != 0) {
       if (mi_page_all_free(page)) {
@@ -684,7 +687,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
   mi_stat_counter_increase(heap->tld->stats.searches, count);
 
   if (page == NULL) {
-    _mi_heap_collect_retired(heap, false); // perhaps make a page available
+    _mi_heap_collect_retired(heap, false); // perhaps make a page available?
     page = mi_page_fresh(heap, pq);
     if (page == NULL && first_try) {
       // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again

From d221a4b9049344758e31850cf0f2716b5e6ff7e3 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 27 Jan 2020 23:36:53 -0800
Subject: [PATCH 032/352] merge from dev-exp

---
 src/page.c   | 3 +--
 src/region.c | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/page.c b/src/page.c
index dcd39ed7..42dfdfbf 100644
--- a/src/page.c
+++ b/src/page.c
@@ -393,8 +393,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   _mi_segment_page_free(page, force, segments_tld);
 }
 
-
-#define MI_MAX_RETIRE_SIZE  (4*MI_SMALL_SIZE_MAX)
+#define MI_MAX_RETIRE_SIZE    (4*MI_SMALL_SIZE_MAX)
 
 // Retire a page with no more used blocks
 // Important to not retire too quickly though as new
diff --git a/src/region.c b/src/region.c
index 92758229..fd7d4544 100644
--- a/src/region.c
+++ b/src/region.c
@@ -284,7 +284,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
   mi_assert_internal(!(info.x.is_large && !*is_large));
   mi_assert_internal(start != NULL);
 
-  *is_zero = mi_bitmap_unclaim(&region->dirty, 1, blocks, bit_idx);  
+  *is_zero = mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);  
   *is_large = info.x.is_large;
   *memid = mi_memid_create(region, bit_idx);
   void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);

From f86519bca6ca2e3730cc2e0e27499729f1af816d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 28 Apr 2020 16:46:00 -0700
Subject: [PATCH 033/352] make lazy commit default; add commit check on segment
 allocation

---
 src/options.c | 4 ++--
 src/segment.c | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/options.c b/src/options.c
index c9c92003..89048c7d 100644
--- a/src/options.c
+++ b/src/options.c
@@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit on demand?
+  { 0, UNINIT, MI_OPTION(eager_commit) },        // commit on demand?
   #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
   { 0, UNINIT, MI_OPTION(eager_region_commit) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
@@ -79,7 +79,7 @@ static mi_option_desc_t options[_mi_option_last] =
 #else
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
 #endif
-  { 1, UNINIT, MI_OPTION(allow_decommit) },      // decommit pages when not eager committed
+  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit pages when not eager committed
   { 100,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
diff --git a/src/segment.c b/src/segment.c
index b8e5f2ec..ed587d5b 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -671,7 +671,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     if (!commit) {
       // at least commit the info slices
       mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE);
-      _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats);
+      bool ok = _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats);
+      if (!ok) return NULL; // failed to commit
     }
     segment->memid = memid;
     segment->mem_is_fixed = mem_large;

From 0d25493c39a616b13fe59b83413d4bda3fff0afe Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 28 Apr 2020 16:50:03 -0700
Subject: [PATCH 034/352] segment size to 16MiB to improve perf on mstress and
 rptest

---
 include/mimalloc-types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index cf94809b..a2c3fa9a 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
 #define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64kb
-#define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  // 8mb
+#define MI_SEGMENT_SHIFT                  ( 8 + MI_SEGMENT_SLICE_SHIFT)  // 8mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512kb

From c609248f0ee2f7daff898d8da516493aea1b1a34 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 30 Apr 2020 13:30:19 -0700
Subject: [PATCH 035/352] do delayed decommit if not reclaiming abandoned
 blocks

---
 src/segment.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/segment.c b/src/segment.c
index ed587d5b..ba7cf687 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1208,6 +1208,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
     }
     else {
       // otherwise, push on the visited list so it gets not looked at too quickly again
+      mi_segment_delayed_decommit(segment, false, tld->stats); // decommit if needed
       mi_abandoned_visited_push(segment);
     }
   }

From dad3be3c645a5e8844df50c38fc9c50d0cc88d6a Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 30 Apr 2020 17:21:36 -0700
Subject: [PATCH 036/352] update comments

---
 include/mimalloc-types.h | 2 +-
 src/arena.c              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index a2c3fa9a..12a420c2 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
 #define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64kb
-#define MI_SEGMENT_SHIFT                  ( 8 + MI_SEGMENT_SLICE_SHIFT)  // 8mb
+#define MI_SEGMENT_SHIFT                  ( 8 + MI_SEGMENT_SLICE_SHIFT)  // 16mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512kb
diff --git a/src/arena.c b/src/arena.c
index 99eb766c..77616580 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -127,7 +127,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 /* -----------------------------------------------------------
   Arena cache
 ----------------------------------------------------------- */
-#define MI_CACHE_MAX (128)  // ~4GiB
+#define MI_CACHE_MAX (128) 
 #define MI_MAX_NUMA  (16)
 
 #define MI_SLOT_IN_USE ((void*)1)

From dcb3574cf05c66ca141d86b3ad33089495f9fbca Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 1 May 2020 21:14:41 -0700
Subject: [PATCH 037/352] fix assertions for huge segment free

---
 include/mimalloc-internal.h |  1 +
 src/alloc.c                 | 31 +------------------------------
 src/segment.c               | 13 ++++++++-----
 3 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index e264751f..01be32c8 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -82,6 +82,7 @@ void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t*
 void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
 bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
 void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
+void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
 
 uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
diff --git a/src/alloc.c b/src/alloc.c
index 2ee8b720..b948071b 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -286,35 +286,6 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co
 // Free
 // ------------------------------------------------------
 
-// free huge block from another thread
-static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
-  // huge page segments are always abandoned and can be freed immediately
-  mi_assert_internal(segment->kind==MI_SEGMENT_HUGE);
-  mi_assert_internal(segment == _mi_page_segment(page));
-  mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
-
-  // claim it and free
-  mi_heap_t* heap = mi_get_default_heap();
-  // paranoia: if this it the last reference, the cas should always succeed
-  if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) {
-    mi_block_set_next(page, block, page->free);
-    page->free = block;
-    page->used--;
-    page->is_zero = false;
-    mi_assert(page->used == 0);
-    mi_tld_t* tld = heap->tld;
-    const size_t bsize = mi_page_block_size(page);
-    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
-      mi_assert_internal(false);
-      _mi_stat_decrease(&tld->stats.large, bsize);
-    }
-    else {
-      _mi_stat_decrease(&tld->stats.huge, bsize);
-    }
-    _mi_segment_page_free(page, true, &tld->segments);
-  }
-}
-
 // multi-threaded free
 static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
 {
@@ -329,7 +300,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
   // huge page segments are always abandoned and can be freed immediately
   mi_segment_t* segment = _mi_page_segment(page);
   if (segment->kind==MI_SEGMENT_HUGE) {
-    mi_free_huge_block_mt(segment, page, block);
+    _mi_segment_huge_page_free(segment, page, block);
     return;
   }
 
diff --git a/src/segment.c b/src/segment.c
index ba7cf687..cd239931 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -166,8 +166,8 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
       mi_slice_t* last = &segment->slices[maxindex];
       mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset);
       mi_assert_internal(slice == last || last->slice_count == 0 );
-      mi_assert_internal(last->xblock_size == 0);
-      if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { // segment is not huge or abandonded
+      mi_assert_internal(last->xblock_size == 0 || (segment->kind==MI_SEGMENT_HUGE && last->xblock_size==1));
+      if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandonded
         sq = mi_span_queue_for(slice->slice_count,tld);
         mi_assert_internal(mi_span_queue_contains(sq,slice));
       }
@@ -525,8 +525,10 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
 
   // for huge pages, just mark as free but don't add to the queues
   if (segment->kind == MI_SEGMENT_HUGE) {
-    mi_assert_internal(segment->used == 0);
+    mi_assert_internal(segment->used == 1);  // decreased right after this call in `mi_segment_page_clear`
     slice->xblock_size = 0;  // mark as free anyways
+    // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to 
+    // avoid a possible cache miss (and the segment is about to be freed)
     return slice;
   }
 
@@ -1022,8 +1024,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
     slice = slice + slice->slice_count;
   }
 
-  // perform delayed decommits instead
-  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld->stats);    
+  // perform delayed decommits
+  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset) /* force? */, tld->stats);    
   
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
@@ -1297,6 +1299,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
 // free huge block from another thread
 void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
   // huge page segments are always abandoned and can be freed immediately by any thread
+  mi_assert_internal(segment->kind==MI_SEGMENT_HUGE);
   mi_assert_internal(segment == _mi_page_segment(page));
   mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
 

From 8bfd5ec865a2f6f1a7d237092daa43c93aec5e2c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 1 May 2020 23:00:17 -0700
Subject: [PATCH 038/352] improve arena cache to avoid full scans

---
 src/arena.c   | 54 ++++++++++++++++++++++++++++++++++++++++++++-------
 src/options.c |  2 +-
 src/segment.c |  4 ++--
 3 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 77616580..3f90a07d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -127,8 +127,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 /* -----------------------------------------------------------
   Arena cache
 ----------------------------------------------------------- */
-#define MI_CACHE_MAX (128) 
-#define MI_MAX_NUMA  (16)
+#define MI_CACHE_MAX (256) 
+#define MI_MAX_NUMA  (8)
 
 #define MI_SLOT_IN_USE ((void*)1)
 
@@ -140,7 +140,20 @@ typedef struct mi_cache_slot_s {
   volatile bool       is_large;
 } mi_cache_slot_t;
 
-static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX];
+static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX];    // = 0
+static volatile _Atomic(uintptr_t) cache_count[MI_MAX_NUMA];  // = 0
+
+typedef union mi_cache_count_u {
+  uintptr_t value;
+  struct {
+    int16_t count;        // at most `count` elements in the cache
+#if MI_INTPTR_SIZE > 4
+    uint32_t epoch;       // each push/pop increase this
+#else
+    uint16_t epoch;
+#endif
+  } x;
+} mi_cache_count_t;
 
 static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
   // only segment blocks
@@ -161,10 +174,23 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   // find a free slot
   mi_cache_slot_t* slot;
   for (int n = numa_min; n <= numa_max; n++) {
-    for (int i = 0; i < MI_CACHE_MAX; i++) {
+    mi_cache_count_t top = { 0 };
+    top.value = mi_atomic_read_relaxed(&cache_count[n]);
+    int16_t count = top.x.count;
+    for (int16_t i = count - 1; i >= 0; i--) {
       slot = &cache[n][i];
       void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p);
-      if (p > MI_SLOT_IN_USE) { // not NULL or 1
+      if (p == NULL) {
+        if (count > 0) { count = i; }
+      }
+      else if (p > MI_SLOT_IN_USE) { // not NULL or 1
+        if (count >= 0 && count < top.x.count) {  // new lower bound?
+          mi_cache_count_t newtop = { 0 };
+          newtop.x.count = count;
+          newtop.x.epoch = top.x.epoch + 1;
+          mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value);  // it's fine to not succeed; just causes longer scans
+        }
+        count = -1; // don't try to set lower bound again
         if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
           // claimed
           if (!*large && slot->is_large) {
@@ -204,7 +230,9 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
   if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
   mi_cache_slot_t* slot;
   int purged = 0;
-  for (int i = 0; i < MI_CACHE_MAX; i++) {
+  mi_cache_count_t top = { 0 };
+  top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
+  for (int i = 0; i < top.x.count; i++) {
     slot = &cache[numa_node][i];
     void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
     if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) {
@@ -240,12 +268,24 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
   int numa_node = _mi_os_numa_node(NULL);
   if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
   mi_cache_slot_t* slot;
-  for (int i = 0; i < MI_CACHE_MAX; i++) {
+  mi_cache_count_t top = { 0 };
+  top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
+  for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) {
     slot = &cache[numa_node][i];
     void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
     if (p == NULL) { // free slot
       if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) {
         // claimed!
+        // first try to increase the top bound
+        mi_cache_count_t newtop = { 0 };
+        newtop.x.count = i+1;
+        newtop.x.epoch = top.x.epoch + 1;
+        while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) {
+          top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
+          if (top.x.count > newtop.x.count) break; // another push max'd it
+          newtop.x.epoch = top.x.epoch + 1;        // otherwise try again
+        }
+        // set the slot
         slot->expire = 0;
         slot->is_committed = is_committed;
         slot->memid = memid;
diff --git a/src/options.c b/src/options.c
index 89048c7d..767a7c35 100644
--- a/src/options.c
+++ b/src/options.c
@@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 0, UNINIT, MI_OPTION(eager_commit) },        // commit on demand?
+  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit on demand?
   #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
   { 0, UNINIT, MI_OPTION(eager_region_commit) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
diff --git a/src/segment.c b/src/segment.c
index cd239931..5cf1598d 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -960,6 +960,7 @@ static void mi_abandoned_push(mi_segment_t* segment) {
 }
 
 // Wait until there are no more pending reads on segments that used to be in the abandoned list
+// called for example from `arena.c` before decommitting
 void _mi_abandoned_await_readers(void) {
   uintptr_t n;
   do {
@@ -982,8 +983,7 @@ static mi_segment_t* mi_abandoned_pop(void) {
 
   // Do a pop. We use a reader count to prevent
   // a segment to be decommitted while a read is still pending,
-  // and a tagged pointer to prevent A-B-A link corruption.
-  // (this is called from `memory.c:_mi_mem_free` for example)
+  // and a tagged pointer to prevent A-B-A link corruption.  
   mi_atomic_increment(&abandoned_readers);  // ensure no segment gets decommitted
   mi_tagged_segment_t next = 0;
   do {

From 79da2728c42bbc59922b4df7decdaa9f7fb99501 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 00:12:45 -0700
Subject: [PATCH 039/352] reduce cache

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 3f90a07d..55f09a60 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -128,7 +128,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
   Arena cache
 ----------------------------------------------------------- */
 #define MI_CACHE_MAX (256) 
-#define MI_MAX_NUMA  (8)
+#define MI_MAX_NUMA  (4)
 
 #define MI_SLOT_IN_USE ((void*)1)
 

From 01ad5539780b4696b734eec964dbc59bacb2c45d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 00:13:03 -0700
Subject: [PATCH 040/352] set default reset delay to 250ms

---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index 767a7c35..382dd65e 100644
--- a/src/options.c
+++ b/src/options.c
@@ -80,7 +80,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit pages when not eager committed
-  { 100,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
+  { 250,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
   { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose

From dd188529464a8c42c12a686a8d085b2137b049fa Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 00:13:40 -0700
Subject: [PATCH 041/352] reduce page retire cycles

---
 src/page.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/page.c b/src/page.c
index 3bc146bc..ebd88253 100644
--- a/src/page.c
+++ b/src/page.c
@@ -394,7 +394,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
 }
 
 #define MI_MAX_RETIRE_SIZE    (MI_MEDIUM_OBJ_SIZE_MAX)
-#define MI_RETIRE_CYCLES      (16)
+#define MI_RETIRE_CYCLES      (8)
 
 // Retire a page with no more used blocks
 // Important to not retire too quickly though as new
@@ -419,7 +419,7 @@ void _mi_page_retire(mi_page_t* page) {
   if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) {
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      page->retire_expire = MI_RETIRE_CYCLES;
+      page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
       mi_heap_t* heap = mi_page_heap(page);
       mi_assert_internal(pq >= heap->pages);
       const size_t index = pq - heap->pages;

From 1b158d8e805734981a1a8e7ea20aa95f6b5c77e4 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 10:37:07 -0700
Subject: [PATCH 042/352] set max retire size to MAX_MEDIUM_OBJ_SIZE

---
 src/page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/page.c b/src/page.c
index 022e2fd1..7c7b5dc1 100644
--- a/src/page.c
+++ b/src/page.c
@@ -393,7 +393,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   _mi_segment_page_free(page, force, segments_tld);
 }
 
-#define MI_MAX_RETIRE_SIZE    MI_LARGE_OBJ_SIZE_MAX  
+#define MI_MAX_RETIRE_SIZE    MI_MEDIUM_OBJ_SIZE_MAX  
 #define MI_RETIRE_CYCLES      (8)
 
 // Retire a page with no more used blocks

From 37b43e4cea3e8e07734d8118c11c3950a3bd26b1 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 10:37:33 -0700
Subject: [PATCH 043/352] improved arena cache

---
 src/arena.c                 | 258 +++++++++++++++---------------------
 src/bitmap.inc.c            |  20 ++-
 test/main-override-static.c |   2 +-
 3 files changed, 127 insertions(+), 153 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 55f09a60..c12e60a1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -127,186 +127,146 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 /* -----------------------------------------------------------
   Arena cache
 ----------------------------------------------------------- */
-#define MI_CACHE_MAX (256) 
-#define MI_MAX_NUMA  (4)
-
-#define MI_SLOT_IN_USE ((void*)1)
+#define MI_CACHE_FIELDS (8)
+#define MI_CACHE_MAX    (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 512 on 64-bit
 
 typedef struct mi_cache_slot_s {
-  volatile _Atomic(void*) p;
-  volatile size_t     memid;
-  volatile mi_msecs_t expire;
-  volatile bool       is_committed;
-  volatile bool       is_large;
+  void*      p;
+  size_t     memid;
+  mi_msecs_t expire;
+  bool       is_committed;  // TODO: use bit from p to reduce size?
 } mi_cache_slot_t;
 
-static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX];    // = 0
-static volatile _Atomic(uintptr_t) cache_count[MI_MAX_NUMA];  // = 0
+static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
+
+#define BITS_SET()  (UINTPTR_MAX)
+static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };        // zero bit = available!
+static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };
+static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
-typedef union mi_cache_count_u {
-  uintptr_t value;
-  struct {
-    int16_t count;        // at most `count` elements in the cache
-#if MI_INTPTR_SIZE > 4
-    uint32_t epoch;       // each push/pop increase this
-#else
-    uint16_t epoch;
-#endif
-  } x;
-} mi_cache_count_t;
 
 static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
   // only segment blocks
   if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL;
 
-  // set numa range 
-  int numa_min = numa_node;
-  int numa_max = numa_min;
-  if (numa_node < 0) {
-    numa_min = 0;
-    numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA;
-  }
-  else {
-    if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
-    numa_min = numa_max = numa_node;
+  // numa node determines start field
+  size_t start_field = 0;
+  if (numa_node > 0) {
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
   }
 
-  // find a free slot
-  mi_cache_slot_t* slot;
-  for (int n = numa_min; n <= numa_max; n++) {
-    mi_cache_count_t top = { 0 };
-    top.value = mi_atomic_read_relaxed(&cache_count[n]);
-    int16_t count = top.x.count;
-    for (int16_t i = count - 1; i >= 0; i--) {
-      slot = &cache[n][i];
-      void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p);
-      if (p == NULL) {
-        if (count > 0) { count = i; }
-      }
-      else if (p > MI_SLOT_IN_USE) { // not NULL or 1
-        if (count >= 0 && count < top.x.count) {  // new lower bound?
-          mi_cache_count_t newtop = { 0 };
-          newtop.x.count = count;
-          newtop.x.epoch = top.x.epoch + 1;
-          mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value);  // it's fine to not succeed; just causes longer scans
-        }
-        count = -1; // don't try to set lower bound again
-        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
-          // claimed
-          if (!*large && slot->is_large) {
-            // back out again
-            mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again
-          }
-          else {
-            // keep it
-            *memid = slot->memid;
-            *large = slot->is_large;
-            *is_zero = false;
-            bool committed = slot->is_committed;
-            mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free
-            if (*commit && !committed) {
-              bool commit_zero;
-              _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
-              *commit = true;
-            }            
-            else {
-              *commit = committed;
-            }
-            return p;
-          }
-        }
-      }
-    }
+  // find an available slot
+  mi_bitmap_index_t bitidx;
+  bool claimed = false;
+  if (*large) {  // large allowed?
+    claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    if (claimed) *large = true;
   }
-  return NULL;
+  if (!claimed) {
+    claimed = mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    if (claimed) *large = false;
+  }
+
+  if (!claimed) return NULL;
+
+  // found a slot
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  void* p = slot->p;
+  *memid = slot->memid;
+  *is_zero = false;
+  bool committed = slot->is_committed;
+  slot->p = NULL;
+  slot->expire = 0;
+  if (*commit && !committed) {
+    bool commit_zero;
+    _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
+    *commit = true;
+  }
+  else {
+    *commit = committed;
+  }
+
+  // mark the slot as free again
+  mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
+  mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
+  return p;
 }
 
 static void mi_cache_purge(mi_os_tld_t* tld) {
-  // TODO: for each numa node instead?
-  // if (mi_option_get(mi_option_arena_reset_delay) == 0) return;
-
+  UNUSED(tld);
   mi_msecs_t now = _mi_clock_now();
-  int numa_node = _mi_os_numa_node(NULL);
-  if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
-  mi_cache_slot_t* slot;
-  int purged = 0;
-  mi_cache_count_t top = { 0 };
-  top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
-  for (int i = 0; i < top.x.count; i++) {
-    slot = &cache[numa_node][i];
-    void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
-    if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) {
-      mi_msecs_t expire = slot->expire;
-      if (expire != 0 && now >= expire) {
-        // expired, try to claim it
-        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
-          // claimed! test again
-          if (slot->is_committed && !slot->is_large && now >= slot->expire) {
-            _mi_abandoned_await_readers();  // wait until safe to decommit
-            _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
-            slot->is_committed = false;
-          }
-          // and unclaim again
-          mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p);
-          purged++;
-          if (purged >= 4) break; // limit to at most 4 decommits per push
+  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
+  size_t purged = 0;
+  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
+    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
+    mi_cache_slot_t* slot = &cache[idx];
+    if (slot->expire != 0 && now >= slot->expire) {  // racy read
+      // seems expired, first claim it from available
+      purged++;
+      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
+      if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
+        // was available, we claimed it
+        if (slot->expire != 0 && now >= slot->expire) {  // safe read
+          // still expired, decommit it
+          slot->expire = 0;
+          mi_assert_internal(slot->is_committed && !slot->is_large);
+          _mi_abandoned_await_readers();  // wait until safe to decommit
+          _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
+          slot->is_committed = false;
         }
+        mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
+      if (purged > 4) break;  // bound to no more than 4 purge tries per push
     }
   }
 }
 
-
 static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) 
 {
-  mi_cache_purge(tld);
-  
   // only for segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
   
-  // try to add it to the cache
+  // numa node determines start field
   int numa_node = _mi_os_numa_node(NULL);
-  if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
-  mi_cache_slot_t* slot;
-  mi_cache_count_t top = { 0 };
-  top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
-  for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) {
-    slot = &cache[numa_node][i];
-    void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
-    if (p == NULL) { // free slot
-      if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) {
-        // claimed!
-        // first try to increase the top bound
-        mi_cache_count_t newtop = { 0 };
-        newtop.x.count = i+1;
-        newtop.x.epoch = top.x.epoch + 1;
-        while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) {
-          top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
-          if (top.x.count > newtop.x.count) break; // another push max'd it
-          newtop.x.epoch = top.x.epoch + 1;        // otherwise try again
-        }
-        // set the slot
-        slot->expire = 0;
-        slot->is_committed = is_committed;
-        slot->memid = memid;
-        slot->is_large = is_large;
-        if (is_committed) {
-          long delay = mi_option_get(mi_option_arena_reset_delay);
-          if (delay == 0 && !is_large) {
-            _mi_abandoned_await_readers(); // wait until safe to decommit
-            _mi_os_decommit(start, size, tld->stats);
-            slot->is_committed = false;
-          }
-          else {
-            slot->expire = _mi_clock_now() + delay;
-          }
-        }
-        mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available;
-        return true;
-      }
+  size_t start_field = 0;
+  if (numa_node > 0) {
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
+  }
+
+  // purge expired entries
+  mi_cache_purge(tld);
+
+  // find an available slot
+  mi_bitmap_index_t bitidx;
+  bool claimed = mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+  if (!claimed) return false;
+
+  mi_assert_internal(mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
+  mi_assert_internal(mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+
+  // set the slot
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  slot->p = start;
+  slot->memid = memid;
+  slot->expire = 0;
+  slot->is_committed = is_committed;
+  if (is_committed && !is_large) {
+    long delay = mi_option_get(mi_option_arena_reset_delay);
+    if (delay == 0) {
+      _mi_abandoned_await_readers(); // wait until safe to decommit
+      _mi_os_decommit(start, size, tld->stats);
+      slot->is_committed = false;
+    }
+    else {
+      slot->expire = _mi_clock_now() + delay;
     }
   }
-  return false;
+  
+  // make it available
+  mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
+  return true;
 }
 
 
diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c
index c3813a44..a107545f 100644
--- a/src/bitmap.inc.c
+++ b/src/bitmap.inc.c
@@ -42,6 +42,11 @@ static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx
   return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
 }
 
+// Create a bit index.
+static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) {  
+  return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS);
+}
+
 // Get the field index from a bit index.
 static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
   return (bitmap_idx / MI_BITMAP_FIELD_BITS);
@@ -177,11 +182,13 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
   return false;
 }
 
-
 // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
+// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
 // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
-static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
-  for (size_t idx = 0; idx < bitmap_fields; idx++) {
+static inline bool mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
+  size_t idx = start_field_idx;
+  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
+    if (idx >= bitmap_fields) idx = 0; // wrap
     if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
       return true;
     }
@@ -189,6 +196,13 @@ static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fi
   return false;
 }
 
+
+// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
+// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
+static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) {
+  return mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx);
+}
+
 // Set `count` bits at `bitmap_idx` to 0 atomically
 // Returns `true` if all `count` bits were 1 previously.
 static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 1ab11385..3ec02bdf 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -184,7 +184,7 @@ int main() {
   // double_free1();
   // double_free2();
   // corrupt_free();
-  block_overflow1();
+  //block_overflow1();
 
   void* p1 = malloc(78);
   void* p2 = malloc(24);

From b8846f7a27b7000c826df5abebb3268d58b8f459 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 10:51:10 -0700
Subject: [PATCH 044/352] fix unprotect of guard pages

---
 include/mimalloc-types.h | 2 +-
 src/arena.c              | 2 +-
 src/segment.c            | 7 ++++++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 12a420c2..35bb1502 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -33,7 +33,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // checks for double free. (may be more expensive)
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 0
+#define MI_SECURE 4
 #endif
 
 // Define MI_DEBUG for debug mode
diff --git a/src/arena.c b/src/arena.c
index c12e60a1..85d5fd3f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -210,7 +210,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
         if (slot->expire != 0 && now >= slot->expire) {  // safe read
           // still expired, decommit it
           slot->expire = 0;
-          mi_assert_internal(slot->is_committed && !slot->is_large);
+          mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
           _mi_abandoned_await_readers();  // wait until safe to decommit
           _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
           slot->is_committed = false;
diff --git a/src/segment.c b/src/segment.c
index 6728ef3b..4d9d6809 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -273,7 +273,12 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_segment_map_freed_at(segment);
   mi_segments_track_size(-((long)mi_segment_size(segment)),tld);
   if (MI_SECURE>0) {
-    _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
+    // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
+    // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted
+    size_t os_page_size = _mi_os_page_size();
+    _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_page_size, os_page_size);
+    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_page_size;
+    _mi_os_unprotect(end, os_page_size);
   }
 
   // purge delayed decommits now? (no, leave it to the cache)

From 66e5484c1c3f7853710e41ba05afc91e8025c4c4 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 11:23:25 -0700
Subject: [PATCH 045/352] fix assertions for huge pages in secure mode

---
 src/arena.c   |  2 +-
 src/segment.c | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 85d5fd3f..5215f934 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -157,7 +157,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   }
 
   // find an available slot
-  mi_bitmap_index_t bitidx;
+  mi_bitmap_index_t bitidx = 0;
   bool claimed = false;
   if (*large) {  // large allowed?
     claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
diff --git a/src/segment.c b/src/segment.c
index 4d9d6809..9a59c878 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -164,10 +164,12 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
     }
     else {  // free range of slices; only last slice needs a valid back offset
       mi_slice_t* last = &segment->slices[maxindex];
-      mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset);
+      if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= segment->slice_entries) {
+        mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset);
+      }
       mi_assert_internal(slice == last || last->slice_count == 0 );
       mi_assert_internal(last->xblock_size == 0 || (segment->kind==MI_SEGMENT_HUGE && last->xblock_size==1));
-      if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandonded
+      if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandoned
         sq = mi_span_queue_for(slice->slice_count,tld);
         mi_assert_internal(mi_span_queue_contains(sq,slice));
       }
@@ -713,6 +715,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));
 
   // set up guard pages
+  size_t guard_slices = 0;
   if (MI_SECURE>0) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data
@@ -723,6 +726,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     mi_segment_ensure_committed(segment, end, os_page_size, tld->stats);
     _mi_os_protect(end, os_page_size);
     if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-(
+    guard_slices = 1;
   }
 
   // reserve first slices for segment info
@@ -737,7 +741,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   }
   else {
     mi_assert_internal(huge_page!=NULL);
-    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices, tld);
+    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld);
   }
 
   mi_assert_expensive(mi_segment_is_valid(segment,tld));

From 18d697a1e6abd4713558bc687677f21ae0e4ba49 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 11:57:33 -0700
Subject: [PATCH 046/352] roll back to old arena cache as it seems to do better
 on AMD

---
 src/arena.c | 265 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 155 insertions(+), 110 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 5215f934..db30e8fc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -127,146 +127,191 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 /* -----------------------------------------------------------
   Arena cache
 ----------------------------------------------------------- */
-#define MI_CACHE_FIELDS (8)
-#define MI_CACHE_MAX    (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 512 on 64-bit
+
+
+/* -----------------------------------------------------------
+  Arena cache
+----------------------------------------------------------- */
+#define MI_CACHE_MAX (256) 
+#define MI_MAX_NUMA  (4)
+
+#define MI_SLOT_IN_USE ((void*)1)
 
 typedef struct mi_cache_slot_s {
-  void*      p;
-  size_t     memid;
-  mi_msecs_t expire;
-  bool       is_committed;  // TODO: use bit from p to reduce size?
+  volatile _Atomic(void*)p;
+  volatile size_t     memid;
+  volatile mi_msecs_t expire;
+  volatile bool       is_committed;
+  volatile bool       is_large;
 } mi_cache_slot_t;
 
-static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
-
-#define BITS_SET()  (UINTPTR_MAX)
-static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };        // zero bit = available!
-static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };
-static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
+static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX];    // = 0
+static volatile _Atomic(uintptr_t)cache_count[MI_MAX_NUMA];  // = 0
 
+typedef union mi_cache_count_u {
+  uintptr_t value;
+  struct {
+    int16_t count;        // at most `count` elements in the cache
+#if MI_INTPTR_SIZE > 4
+    uint32_t epoch;       // each push/pop increase this
+#else
+    uint16_t epoch;
+#endif
+  } x;
+} mi_cache_count_t;
 
 static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
   // only segment blocks
   if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL;
 
-  // numa node determines start field
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // find an available slot
-  mi_bitmap_index_t bitidx = 0;
-  bool claimed = false;
-  if (*large) {  // large allowed?
-    claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-    if (claimed) *large = true;
-  }
-  if (!claimed) {
-    claimed = mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-    if (claimed) *large = false;
-  }
-
-  if (!claimed) return NULL;
-
-  // found a slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  void* p = slot->p;
-  *memid = slot->memid;
-  *is_zero = false;
-  bool committed = slot->is_committed;
-  slot->p = NULL;
-  slot->expire = 0;
-  if (*commit && !committed) {
-    bool commit_zero;
-    _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
-    *commit = true;
+  // set numa range 
+  int numa_min = numa_node;
+  int numa_max = numa_min;
+  if (numa_node < 0) {
+    numa_min = 0;
+    numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA;
   }
   else {
-    *commit = committed;
+    if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
+    numa_min = numa_max = numa_node;
   }
 
-  // mark the slot as free again
-  mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
-  mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
-  return p;
+  // find a free slot
+  mi_cache_slot_t* slot;
+  for (int n = numa_min; n <= numa_max; n++) {
+    mi_cache_count_t top = { 0 };
+    top.value = mi_atomic_read_relaxed(&cache_count[n]);
+    int16_t count = top.x.count;
+    for (int16_t i = count - 1; i >= 0; i--) {
+      slot = &cache[n][i];
+      void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
+      if (p == NULL) {
+        if (count > 0) { count = i; }
+      }
+      else if (p > MI_SLOT_IN_USE) { // not NULL or 1
+        if (count >= 0 && count < top.x.count) {  // new lower bound?
+          mi_cache_count_t newtop = { 0 };
+          newtop.x.count = count;
+          newtop.x.epoch = top.x.epoch + 1;
+          mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value);  // it's fine to not succeed; just causes longer scans
+        }
+        count = -1; // don't try to set lower bound again
+        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
+          // claimed
+          if (!*large && slot->is_large) {
+            // back out again
+            mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again
+          }
+          else {
+            // keep it
+            *memid = slot->memid;
+            *large = slot->is_large;
+            *is_zero = false;
+            bool committed = slot->is_committed;
+            mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free
+            if (*commit && !committed) {
+              bool commit_zero;
+              _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
+              *commit = true;
+            }
+            else {
+              *commit = committed;
+            }
+            return p;
+          }
+        }
+      }
+    }
+  }
+  return NULL;
 }
 
 static void mi_cache_purge(mi_os_tld_t* tld) {
-  UNUSED(tld);
+  // TODO: for each numa node instead?
+  // if (mi_option_get(mi_option_arena_reset_delay) == 0) return;
+
   mi_msecs_t now = _mi_clock_now();
-  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
-  size_t purged = 0;
-  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
-    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
-    mi_cache_slot_t* slot = &cache[idx];
-    if (slot->expire != 0 && now >= slot->expire) {  // racy read
-      // seems expired, first claim it from available
-      purged++;
-      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
-      if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
-        // was available, we claimed it
-        if (slot->expire != 0 && now >= slot->expire) {  // safe read
-          // still expired, decommit it
-          slot->expire = 0;
-          mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
-          _mi_abandoned_await_readers();  // wait until safe to decommit
-          _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
-          slot->is_committed = false;
+  int numa_node = _mi_os_numa_node(NULL);
+  if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
+  mi_cache_slot_t* slot;
+  int purged = 0;
+  mi_cache_count_t top = { 0 };
+  top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
+  for (int i = 0; i < top.x.count; i++) {
+    slot = &cache[numa_node][i];
+    void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
+    if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) {
+      mi_msecs_t expire = slot->expire;
+      if (expire != 0 && now >= expire) {
+        // expired, try to claim it
+        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
+          // claimed! test again
+          if (slot->is_committed && !slot->is_large && now >= slot->expire) {
+            _mi_abandoned_await_readers();  // wait until safe to decommit
+            _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
+            slot->is_committed = false;
+          }
+          // and unclaim again
+          mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p);
+          purged++;
+          if (purged >= 4) break; // limit to at most 4 decommits per push
         }
-        mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
-      if (purged > 4) break;  // bound to no more than 4 purge tries per push
     }
   }
 }
 
-static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) 
-{
-  // only for segment blocks
-  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
-  
-  // numa node determines start field
-  int numa_node = _mi_os_numa_node(NULL);
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
 
-  // purge expired entries
+static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld)
+{
   mi_cache_purge(tld);
 
-  // find an available slot
-  mi_bitmap_index_t bitidx;
-  bool claimed = mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-  if (!claimed) return false;
+  // only for segment blocks
+  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
 
-  mi_assert_internal(mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
-
-  // set the slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  slot->p = start;
-  slot->memid = memid;
-  slot->expire = 0;
-  slot->is_committed = is_committed;
-  if (is_committed && !is_large) {
-    long delay = mi_option_get(mi_option_arena_reset_delay);
-    if (delay == 0) {
-      _mi_abandoned_await_readers(); // wait until safe to decommit
-      _mi_os_decommit(start, size, tld->stats);
-      slot->is_committed = false;
-    }
-    else {
-      slot->expire = _mi_clock_now() + delay;
+  // try to add it to the cache
+  int numa_node = _mi_os_numa_node(NULL);
+  if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
+  mi_cache_slot_t* slot;
+  mi_cache_count_t top = { 0 };
+  top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
+  for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) {
+    slot = &cache[numa_node][i];
+    void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
+    if (p == NULL) { // free slot
+      if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) {
+        // claimed!
+        // first try to increase the top bound
+        mi_cache_count_t newtop = { 0 };
+        newtop.x.count = i+1;
+        newtop.x.epoch = top.x.epoch + 1;
+        while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) {
+          top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
+          if (top.x.count > newtop.x.count) break; // another push max'd it
+          newtop.x.epoch = top.x.epoch + 1;        // otherwise try again
+        }
+        // set the slot
+        slot->expire = 0;
+        slot->is_committed = is_committed;
+        slot->memid = memid;
+        slot->is_large = is_large;
+        if (is_committed) {
+          long delay = mi_option_get(mi_option_arena_reset_delay);
+          if (delay == 0 && !is_large) {
+            _mi_abandoned_await_readers(); // wait until safe to decommit
+            _mi_os_decommit(start, size, tld->stats);
+            slot->is_committed = false;
+          }
+          else {
+            slot->expire = _mi_clock_now() + delay;
+          }
+        }
+        mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available;
+        return true;
+      }
     }
   }
-  
-  // make it available
-  mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
-  return true;
+  return false;
 }
 
 

From 69158f2c76ef3ce2b61c8f0326ce02d451cc9c5d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 12:04:36 -0700
Subject: [PATCH 047/352] roll back again to new arena cache: previous perf
 regression was caused due to accidentally making secure mode default

---
 include/mimalloc-types.h |   2 +-
 src/arena.c              | 266 ++++++++++++++++-----------------------
 2 files changed, 112 insertions(+), 156 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 35bb1502..12a420c2 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -33,7 +33,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_SECURE 4  // checks for double free. (may be more expensive)
 
 #if !defined(MI_SECURE)
-#define MI_SECURE 4
+#define MI_SECURE 0
 #endif
 
 // Define MI_DEBUG for debug mode
diff --git a/src/arena.c b/src/arena.c
index db30e8fc..b946ae64 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -128,190 +128,146 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
   Arena cache
 ----------------------------------------------------------- */
 
-
-/* -----------------------------------------------------------
-  Arena cache
------------------------------------------------------------ */
-#define MI_CACHE_MAX (256) 
-#define MI_MAX_NUMA  (4)
-
-#define MI_SLOT_IN_USE ((void*)1)
+#define MI_CACHE_FIELDS (8)
+#define MI_CACHE_MAX    (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 512 on 64-bit
 
 typedef struct mi_cache_slot_s {
-  volatile _Atomic(void*)p;
-  volatile size_t     memid;
-  volatile mi_msecs_t expire;
-  volatile bool       is_committed;
-  volatile bool       is_large;
+  void*      p;
+  size_t     memid;
+  mi_msecs_t expire;
+  bool       is_committed;  // TODO: use bit from p to reduce size?
 } mi_cache_slot_t;
 
-static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX];    // = 0
-static volatile _Atomic(uintptr_t)cache_count[MI_MAX_NUMA];  // = 0
+static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
+
+#define BITS_SET()  (UINTPTR_MAX)
+static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };        // zero bit = available!
+static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };
+static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
-typedef union mi_cache_count_u {
-  uintptr_t value;
-  struct {
-    int16_t count;        // at most `count` elements in the cache
-#if MI_INTPTR_SIZE > 4
-    uint32_t epoch;       // each push/pop increase this
-#else
-    uint16_t epoch;
-#endif
-  } x;
-} mi_cache_count_t;
 
 static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
   // only segment blocks
   if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL;
 
-  // set numa range 
-  int numa_min = numa_node;
-  int numa_max = numa_min;
-  if (numa_node < 0) {
-    numa_min = 0;
-    numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA;
-  }
-  else {
-    if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
-    numa_min = numa_max = numa_node;
+  // numa node determines start field
+  size_t start_field = 0;
+  if (numa_node > 0) {
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
   }
 
-  // find a free slot
-  mi_cache_slot_t* slot;
-  for (int n = numa_min; n <= numa_max; n++) {
-    mi_cache_count_t top = { 0 };
-    top.value = mi_atomic_read_relaxed(&cache_count[n]);
-    int16_t count = top.x.count;
-    for (int16_t i = count - 1; i >= 0; i--) {
-      slot = &cache[n][i];
-      void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
-      if (p == NULL) {
-        if (count > 0) { count = i; }
-      }
-      else if (p > MI_SLOT_IN_USE) { // not NULL or 1
-        if (count >= 0 && count < top.x.count) {  // new lower bound?
-          mi_cache_count_t newtop = { 0 };
-          newtop.x.count = count;
-          newtop.x.epoch = top.x.epoch + 1;
-          mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value);  // it's fine to not succeed; just causes longer scans
-        }
-        count = -1; // don't try to set lower bound again
-        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
-          // claimed
-          if (!*large && slot->is_large) {
-            // back out again
-            mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again
-          }
-          else {
-            // keep it
-            *memid = slot->memid;
-            *large = slot->is_large;
-            *is_zero = false;
-            bool committed = slot->is_committed;
-            mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free
-            if (*commit && !committed) {
-              bool commit_zero;
-              _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
-              *commit = true;
-            }
-            else {
-              *commit = committed;
-            }
-            return p;
-          }
-        }
-      }
-    }
+  // find an available slot
+  mi_bitmap_index_t bitidx = 0;
+  bool claimed = false;
+  if (*large) {  // large allowed?
+    claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    if (claimed) *large = true;
   }
-  return NULL;
+  if (!claimed) {
+    claimed = mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    if (claimed) *large = false;
+  }
+
+  if (!claimed) return NULL;
+
+  // found a slot
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  void* p = slot->p;
+  *memid = slot->memid;
+  *is_zero = false;
+  bool committed = slot->is_committed;
+  slot->p = NULL;
+  slot->expire = 0;
+  if (*commit && !committed) {
+    bool commit_zero;
+    _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
+    *commit = true;
+  }
+  else {
+    *commit = committed;
+  }
+
+  // mark the slot as free again
+  mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
+  mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
+  return p;
 }
 
 static void mi_cache_purge(mi_os_tld_t* tld) {
-  // TODO: for each numa node instead?
-  // if (mi_option_get(mi_option_arena_reset_delay) == 0) return;
-
+  UNUSED(tld);
   mi_msecs_t now = _mi_clock_now();
-  int numa_node = _mi_os_numa_node(NULL);
-  if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
-  mi_cache_slot_t* slot;
-  int purged = 0;
-  mi_cache_count_t top = { 0 };
-  top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
-  for (int i = 0; i < top.x.count; i++) {
-    slot = &cache[numa_node][i];
-    void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
-    if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) {
-      mi_msecs_t expire = slot->expire;
-      if (expire != 0 && now >= expire) {
-        // expired, try to claim it
-        if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
-          // claimed! test again
-          if (slot->is_committed && !slot->is_large && now >= slot->expire) {
-            _mi_abandoned_await_readers();  // wait until safe to decommit
-            _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
-            slot->is_committed = false;
-          }
-          // and unclaim again
-          mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p);
-          purged++;
-          if (purged >= 4) break; // limit to at most 4 decommits per push
+  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
+  size_t purged = 0;
+  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
+    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
+    mi_cache_slot_t* slot = &cache[idx];
+    if (slot->expire != 0 && now >= slot->expire) {  // racy read
+      // seems expired, first claim it from available
+      purged++;
+      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
+      if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
+        // was available, we claimed it
+        if (slot->expire != 0 && now >= slot->expire) {  // safe read
+          // still expired, decommit it
+          slot->expire = 0;
+          mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+          _mi_abandoned_await_readers();  // wait until safe to decommit
+          _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
+          slot->is_committed = false;
         }
+        mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
+      if (purged > 4) break;  // bound to no more than 4 purge tries per push
     }
   }
 }
 
-
-static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld)
+static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) 
 {
-  mi_cache_purge(tld);
-
   // only for segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
-
-  // try to add it to the cache
+  
+  // numa node determines start field
   int numa_node = _mi_os_numa_node(NULL);
-  if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA;
-  mi_cache_slot_t* slot;
-  mi_cache_count_t top = { 0 };
-  top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
-  for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) {
-    slot = &cache[numa_node][i];
-    void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p);
-    if (p == NULL) { // free slot
-      if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) {
-        // claimed!
-        // first try to increase the top bound
-        mi_cache_count_t newtop = { 0 };
-        newtop.x.count = i+1;
-        newtop.x.epoch = top.x.epoch + 1;
-        while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) {
-          top.value = mi_atomic_read_relaxed(&cache_count[numa_node]);
-          if (top.x.count > newtop.x.count) break; // another push max'd it
-          newtop.x.epoch = top.x.epoch + 1;        // otherwise try again
-        }
-        // set the slot
-        slot->expire = 0;
-        slot->is_committed = is_committed;
-        slot->memid = memid;
-        slot->is_large = is_large;
-        if (is_committed) {
-          long delay = mi_option_get(mi_option_arena_reset_delay);
-          if (delay == 0 && !is_large) {
-            _mi_abandoned_await_readers(); // wait until safe to decommit
-            _mi_os_decommit(start, size, tld->stats);
-            slot->is_committed = false;
-          }
-          else {
-            slot->expire = _mi_clock_now() + delay;
-          }
-        }
-        mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available;
-        return true;
-      }
+  size_t start_field = 0;
+  if (numa_node > 0) {
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
+  }
+
+  // purge expired entries
+  mi_cache_purge(tld);
+
+  // find an available slot
+  mi_bitmap_index_t bitidx;
+  bool claimed = mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+  if (!claimed) return false;
+
+  mi_assert_internal(mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
+  mi_assert_internal(mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+
+  // set the slot
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  slot->p = start;
+  slot->memid = memid;
+  slot->expire = 0;
+  slot->is_committed = is_committed;
+  if (is_committed && !is_large) {
+    long delay = mi_option_get(mi_option_arena_reset_delay);
+    if (delay == 0) {
+      _mi_abandoned_await_readers(); // wait until safe to decommit
+      _mi_os_decommit(start, size, tld->stats);
+      slot->is_committed = false;
+    }
+    else {
+      slot->expire = _mi_clock_now() + delay;
     }
   }
-  return false;
+  
+  // make it available
+  mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
+  return true;
 }
 
 

From a4b7baf6fd745d40883b5be9017105e318b3afd3 Mon Sep 17 00:00:00 2001
From: Daan <daan@microsoft.com>
Date: Sat, 2 May 2020 18:08:31 -0700
Subject: [PATCH 048/352] Update readme with descriptions of secure and debug
 mode

---
 readme.md | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/readme.md b/readme.md
index 583d54ed..fd600763 100644
--- a/readme.md
+++ b/readme.md
@@ -255,6 +255,32 @@ OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the
 [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017
 
+## Secure Mode
+
+_mimalloc_ can be build in secure mode by using the `-DMI_SECURE=ON` flags in `cmake`. This build enables various mitigations
+to make mimalloc more robust against exploits. In particular:
+
+- All internal mimalloc pages are surrounded by guard pages and the heap metadata is behind a guard page as well (so a buffer overflow
+  exploit cannot reach into the metadata),
+- All free list pointers are 
+  [encoded](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) 
+  with per-page keys which is used both to prevent overwrites with a known pointer, as well as to detect heap corruption,
+- Double free's are detected (and ignored),
+- The free lists are initialized in a random order and allocation randomly chooses between extension and reuse within a page to 
+  mitigate against attacks that rely on a predicable allocation order. Similarly, the larger heap blocks allocated by mimalloc 
+  from the OS are also address randomized.
+
+As always, evaluate with care as part of an overall security strategy as all of the above are mitigations but not guarantees.
+
+## Debug Mode
+
+When _mimalloc_ is built using debug mode, various checks are done at runtime to catch development errors. 
+
+- Statistics are maintained in detail for each object size. They can be shown using `MIMALLOC_SHOW_STATS=1` at runtime.
+- All objects have padding at the end to detect (byte precise) heap block overflows.
+- Double free's, and freeing invalid heap pointers are detected.
+- Corrupted free-lists and some forms of use-after-free are detected.
+
 
 # Overriding Malloc
 

From ea92fb2fe4c21118eac145615dfade37ec22fc92 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 21:40:14 -0700
Subject: [PATCH 049/352] lower arena reset delay

---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index 382dd65e..1616e9a6 100644
--- a/src/options.c
+++ b/src/options.c
@@ -81,7 +81,7 @@ static mi_option_desc_t options[_mi_option_last] =
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit pages when not eager committed
   { 250,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
-  { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
+  { 250,  UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,   UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output

From e5b72cdfe7dfcb495a5a2a6e4a0a0bbf0b9d8058 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 2 May 2020 22:22:35 -0700
Subject: [PATCH 050/352] reduce segment size and increase cache

---
 include/mimalloc-types.h | 2 +-
 src/arena.c              | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 12a420c2..211ecfec 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
 #define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64kb
-#define MI_SEGMENT_SHIFT                  ( 8 + MI_SEGMENT_SLICE_SHIFT)  // 16mb
+#define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  //  8mb
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64kb
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512kb
diff --git a/src/arena.c b/src/arena.c
index b946ae64..351d9fb5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -128,8 +128,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
   Arena cache
 ----------------------------------------------------------- */
 
-#define MI_CACHE_FIELDS (8)
-#define MI_CACHE_MAX    (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 512 on 64-bit
+#define MI_CACHE_FIELDS (16)
+#define MI_CACHE_MAX    (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
 
 typedef struct mi_cache_slot_s {
   void*      p;
@@ -141,8 +141,8 @@ typedef struct mi_cache_slot_s {
 static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
 
 #define BITS_SET()  (UINTPTR_MAX)
-static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };        // zero bit = available!
-static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) };
+static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT16(BITS_SET) };        // zero bit = available!
+static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT16(BITS_SET) };
 static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
 

From 28f4f1ce04b1438824429dc37b749d9a0ca42005 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 May 2020 10:45:46 -0700
Subject: [PATCH 051/352] nice cache initialization

---
 src/arena.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 351d9fb5..87474bcd 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -128,8 +128,9 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
   Arena cache
 ----------------------------------------------------------- */
 
-#define MI_CACHE_FIELDS (16)
-#define MI_CACHE_MAX    (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
+#define MI_CACHE_FIELDS     (16)
+#define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
+#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)
 
 typedef struct mi_cache_slot_s {
   void*      p;
@@ -141,8 +142,8 @@ typedef struct mi_cache_slot_s {
 static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
 
 #define BITS_SET()  (UINTPTR_MAX)
-static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT16(BITS_SET) };        // zero bit = available!
-static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT16(BITS_SET) };
+static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
+static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
 static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
 

From 30799bce73d5abbbf088ec5804758f2adf7d5323 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 May 2020 11:42:38 -0700
Subject: [PATCH 052/352] fix assertion for huge segments

---
 src/segment.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 9a59c878..83f6c3de 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -164,7 +164,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
     }
     else {  // free range of slices; only last slice needs a valid back offset
       mi_slice_t* last = &segment->slices[maxindex];
-      if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= segment->slice_entries) {
+      if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= (segment->slice_entries - segment->segment_info_slices)) {
         mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset);
       }
       mi_assert_internal(slice == last || last->slice_count == 0 );
@@ -424,7 +424,7 @@ static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_
 }
 
 static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  if (!segment->allow_decommit) return; // TODO: check option_decommit?
+  if (!segment->allow_decommit) return;
   if (segment->commit_mask == 1) return; // fully decommitted
   if (mi_option_get(mi_option_reset_delay) == 0) {
     mi_segment_commitx(segment, false, p, size, stats);
@@ -698,7 +698,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
 
   if (!commit_info_still_good) {
     segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed
-    segment->allow_decommit = mi_option_is_enabled(mi_option_allow_decommit);
+    segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_fixed);
     segment->decommit_expire = 0;
     segment->decommit_mask = 0;
   }

From cce998a835a180715f52f0ddfc62f9877923e7ec Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 May 2020 11:42:49 -0700
Subject: [PATCH 053/352] fix assertion for huge blocks

---
 src/page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/page.c b/src/page.c
index 7c7b5dc1..eb736fdb 100644
--- a/src/page.c
+++ b/src/page.c
@@ -783,7 +783,7 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
   mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
   mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size);
   if (page != NULL) {
-    const size_t bsize = mi_page_usable_block_size(page);
+    const size_t bsize = mi_page_block_size(page);  // note: not `mi_page_usable_block_size` as `size` includes padding
     mi_assert_internal(mi_page_immediate_available(page));
     mi_assert_internal(bsize >= size);
 

From 74ea69b7847debd1e5e78909234bc3795fa36c4b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 3 May 2020 16:33:29 -0700
Subject: [PATCH 054/352] increase default arena reset delay (behaves better on
 36+ core systems)

---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index 1616e9a6..c87492fd 100644
--- a/src/options.c
+++ b/src/options.c
@@ -81,7 +81,7 @@ static mi_option_desc_t options[_mi_option_last] =
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit pages when not eager committed
   { 250,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
-  { 250,  UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
+  { 500,  UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,   UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output

From 82e29f47b38b11c7fefac98882e90cfa9cb5b80d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 18 May 2020 18:51:06 -0700
Subject: [PATCH 055/352] weaken assertion, #245

---
 src/alloc-aligned.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 7eeb9e92..45c7167a 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -54,7 +54,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
 
   // .. and align within the allocation
   uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask);
-  mi_assert_internal(adjust % sizeof(uintptr_t) == 0);
+  mi_assert_internal(adjust >= alignment);
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
   if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); 
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);

From e4ddc750697763613aa8c2e7862905489451a657 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 28 Aug 2020 08:46:51 -0700
Subject: [PATCH 056/352] set delayed decommit mask more precisely to only
 decommit currently committed blocks

---
 include/mimalloc-types.h |  2 +-
 src/segment.c            | 12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 211ecfec..01e087b9 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -264,7 +264,7 @@ typedef mi_page_t mi_slice_t;
 typedef int64_t  mi_msecs_t;
 
 
-// Segments are large allocated memory blocks (2mb on 64 bit) from
+// Segments are large allocated memory blocks (8mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
 typedef struct mi_segment_s {
diff --git a/src/segment.c b/src/segment.c
index 66c0be04..72df9c70 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -137,6 +137,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(segment->abandoned <= segment->used);
   mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
+  mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask); // can only decommit committed blocks
   //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
   mi_slice_t* slice = &segment->slices[0];
   const mi_slice_t* end = mi_segment_slices_end(segment);
@@ -414,8 +415,9 @@ static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   if (commit && (segment->decommit_mask & mask) != 0) {
     segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
   }
-  // always undo delayed decommits 
-  segment->decommit_mask &= ~mask;    
+  // always undo delayed decommits
+  segment->decommit_mask &= ~mask;   
+  mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask);
 }
 
 static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
@@ -425,19 +427,19 @@ static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_
 
 static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
   if (!segment->allow_decommit) return;
-  if (segment->commit_mask == 1) return; // fully decommitted
+  if (segment->commit_mask == 1) return; // fully decommitted (1 = the initial segment metadata span)
   if (mi_option_get(mi_option_reset_delay) == 0) {
     mi_segment_commitx(segment, false, p, size, stats);
   }
   else {
-    // create mask
+    // register for future decommit in the decommit mask
     uint8_t* start;
     size_t   full_size;
     uintptr_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size);
     if (mask==0 || full_size==0) return;
     
     // update delayed commit
-    segment->decommit_mask |= mask;
+    segment->decommit_mask |= (mask & segment->commit_mask);  // only decommit what is committed; span_free may try to decommit more
     segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
   }  
 }

From c1a834e8865d4b05f931cea85f8a65c3fc48a0a7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 28 Aug 2020 10:40:46 -0700
Subject: [PATCH 057/352] add checks for when memory commit fails to return
 NULL

---
 src/segment.c | 79 ++++++++++++++++++++++++---------------------------
 1 file changed, 37 insertions(+), 42 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 72df9c70..42919851 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -196,39 +196,19 @@ static size_t mi_segment_info_size(mi_segment_t* segment) {
   return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
 }
 
+static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t* page_size)
+{
+  ptrdiff_t idx = slice - segment->slices;
+  size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE;
+  if (page_size != NULL) *page_size = psize;
+  return (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
+}
+
 // Start of the page available memory; can be used on uninitialized pages
 uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
 {
   const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
-  ptrdiff_t idx = slice - segment->slices;
-  size_t psize  = slice->slice_count*MI_SEGMENT_SLICE_SIZE;
-  uint8_t* p    = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
-  /*
-  if (idx == 0) {
-    // the first page starts after the segment info (and possible guard page)
-    p += segment->segment_info_size;
-    psize -= segment->segment_info_size;
-
-    // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
-    // to ensure this, we over-estimate and align with the OS page size
-    const size_t asize = _mi_os_page_size();
-    uint8_t* q = (uint8_t*)_mi_align_up((uintptr_t)p, _mi_os_page_size());
-    if (p < q) {
-      psize -= (q - p);
-      p      = q;
-    }
-    mi_assert_internal((uintptr_t)p % _mi_os_page_size() == 0);
-  }
-  */
-  /* TODO: guard pages between every slice span
-  if (MI_SECURE > 1 || (MI_SECURE == 1 && slice == &segment->slices[segment->slice_entries - 1])) {
-    // secure == 1: the last page has an os guard page at the end
-    // secure >  1: every page has an os guard page
-    psize -= _mi_os_page_size();
-  }
-  */
-
-  if (page_size != NULL) *page_size = psize;
+  uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page_size);
   mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page);
   mi_assert_internal(_mi_ptr_segment(p) == segment);
   return p;
@@ -394,21 +374,21 @@ static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative
   return mask;
 }
 
-static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
+static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
   // commit liberal, but decommit conservative
   uint8_t* start;
   size_t   full_size;
   uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size);  
-  if (mask==0 || full_size==0) return;
+  if (mask==0 || full_size==0) return true;
 
   if (commit && (segment->commit_mask & mask) != mask) {
     bool is_zero = false;
-    _mi_os_commit(start,full_size,&is_zero,stats);
+    if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;
     segment->commit_mask |= mask;     
   }
   else if (!commit && (segment->commit_mask & mask) != 0) {
     mi_assert_internal((void*)start != (void*)segment);
-    _mi_os_decommit(start, full_size, stats);
+    _mi_os_decommit(start, full_size, stats);  // ok if this fails
     segment->commit_mask &= ~mask;
   }
   // increase expiration of reusing part of the delayed decommit
@@ -418,11 +398,12 @@ static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   // always undo delayed decommits
   segment->decommit_mask &= ~mask;   
   mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask);
+  return true;
 }
 
-static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  if (~segment->commit_mask == 0 && segment->decommit_mask==0) return; // fully committed
-  mi_segment_commitx(segment,true,p,size,stats);
+static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
+  if (~segment->commit_mask == 0 && segment->decommit_mask==0) return true; // fully committed
+  return mi_segment_commitx(segment,true,p,size,stats);
 }
 
 static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
@@ -580,11 +561,18 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz
   slice->slice_count = (uint32_t)slice_count;
 }
 
-
+// Note: may still return NULL if committing the memory failed
 static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_entries);
   mi_slice_t* slice = &segment->slices[slice_index];
   mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1);
+
+  // commit before changing the slice data
+  if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) {
+    return NULL;  // commit failed!
+  }
+
+  // convert the slices to a page
   slice->slice_offset = 0;
   slice->slice_count = (uint32_t)slice_count;
   mi_assert_internal(slice->slice_count == slice_count);
@@ -611,9 +599,8 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
     last->slice_count = 0;
     last->xblock_size = 1;
   }
-
-  // ensure the memory is committed
-  mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
+  
+  // and initialize the page
   page->is_reset = false;
   page->is_committed = true;
   segment->used++;
@@ -635,7 +622,13 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
           mi_segment_slice_split(segment, slice, slice_count, tld);
         }
         mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
-        return mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
+        mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
+        if (page == NULL) {
+          // commit failed; return NULL but first restore the slice
+          mi_segment_span_free_coalesce(slice, tld);
+          return NULL;
+        }
+        return page;        
       }
     }
     sq++;
@@ -732,7 +725,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   }
 
   // reserve first slices for segment info
-  mi_segment_span_allocate(segment, 0, info_slices, tld);
+  mi_page_t* page0 = mi_segment_span_allocate(segment, 0, info_slices, tld);
+  mi_assert_internal(page0!=NULL); if (page0==NULL) return NULL; // cannot fail as we always commit in advance  
   mi_assert_internal(segment->used == 1);
   segment->used = 0; // don't count our internal slices towards usage
   
@@ -744,6 +738,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   else {
     mi_assert_internal(huge_page!=NULL);
     *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld);
+    mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance 
   }
 
   mi_assert_expensive(mi_segment_is_valid(segment,tld));

From 228b5f6e9df525bfa22c808dce28653f58f92b1c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 3 Sep 2020 12:19:04 -0700
Subject: [PATCH 058/352] use atomic load for segment map

---
 src/segment.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 2c559f04..c9fb364e 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1392,7 +1392,7 @@ static void mi_segment_map_allocated_at(const mi_segment_t* segment) {
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
   if (index==0) return;
-  uintptr_t mask = mi_segment_map[index];
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
   uintptr_t newmask;
   do {
     newmask = (mask | ((uintptr_t)1 << bitidx));
@@ -1404,7 +1404,7 @@ static void mi_segment_map_freed_at(const mi_segment_t* segment) {
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
   if (index == 0) return;
-  uintptr_t mask = mi_segment_map[index];
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
   uintptr_t newmask;
   do {    
     newmask = (mask & ~((uintptr_t)1 << bitidx));
@@ -1417,7 +1417,8 @@ static mi_segment_t* _mi_segment_of(const void* p) {
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
-  if (mi_likely((mi_segment_map[index] & ((uintptr_t)1 << bitidx)) != 0)) {
+  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) {
     return segment; // yes, allocated by us
   }
   if (index==0) return NULL;
@@ -1427,16 +1428,17 @@ static mi_segment_t* _mi_segment_of(const void* p) {
   // note: we could maintain a lowest index to speed up the path for invalid pointers?
   size_t lobitidx;
   size_t loindex;
-  uintptr_t lobits = mi_segment_map[index] & (((uintptr_t)1 << bitidx) - 1);
+  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
   if (lobits != 0) {
     loindex = index;
     lobitidx = _mi_bsr(lobits);
   }
   else {
+    uintptr_t lomask = mask;
     loindex = index - 1;
-    while (loindex > 0 && mi_segment_map[loindex] == 0) loindex--;
+    while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--;
     if (loindex==0) return NULL;
-    lobitidx = _mi_bsr(mi_segment_map[loindex]);
+    lobitidx = _mi_bsr(lomask);
   }
   // take difference as the addresses could be larger than the MAX_ADDRESS space.
   size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;

From 7058e501cbdfc181a69456643915e4d0718fff0e Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Thu, 3 Sep 2020 13:53:56 -0700
Subject: [PATCH 059/352] use atomic ops for the expire field; passes TSAN now

---
 src/arena.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 8f4e1783..959d59c5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -133,10 +133,10 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 #define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)
 
 typedef struct mi_cache_slot_s {
-  void*      p;
-  size_t     memid;
-  mi_msecs_t expire;
-  bool       is_committed;  // TODO: use bit from p to reduce size?
+  void*               p;
+  size_t              memid;
+  _Atomic(mi_msecs_t) expire;
+  bool                is_committed;  // TODO: use bit from p to reduce size?
 } mi_cache_slot_t;
 
 static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
@@ -179,7 +179,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   *is_zero = false;
   bool committed = slot->is_committed;
   slot->p = NULL;
-  slot->expire = 0;
+  mi_atomic_store_release(&slot->expire,0);
   if (*commit && !committed) {
     bool commit_zero;
     _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
@@ -203,15 +203,17 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
   for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
     if (idx >= MI_CACHE_MAX) idx = 0; // wrap
     mi_cache_slot_t* slot = &cache[idx];
-    if (slot->expire != 0 && now >= slot->expire) {  // racy read
+    mi_msecs_t expire = mi_atomic_load_relaxed(&slot->expire);
+    if (expire != 0 && now >= expire) {  // racy read
       // seems expired, first claim it from available
       purged++;
       mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
       if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
         // was available, we claimed it
-        if (slot->expire != 0 && now >= slot->expire) {  // safe read
+        expire = mi_atomic_load_acquire(&slot->expire);
+        if (expire != 0 && now >= expire) {  // safe read
           // still expired, decommit it
-          slot->expire = 0;
+          mi_atomic_store_relaxed(&slot->expire,0);
           mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
           _mi_abandoned_await_readers();  // wait until safe to decommit
           _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
@@ -252,7 +254,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
   mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
   slot->p = start;
   slot->memid = memid;
-  slot->expire = 0;
+  mi_atomic_store_relaxed(&slot->expire,0);
   slot->is_committed = is_committed;
   if (is_committed && !is_large) {
     long delay = mi_option_get(mi_option_arena_reset_delay);
@@ -262,7 +264,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
       slot->is_committed = false;
     }
     else {
-      slot->expire = _mi_clock_now() + delay;
+      mi_atomic_store_release(&slot->expire, _mi_clock_now() + delay);
     }
   }
   

From f6109765d822d686fa94466ddf26480f78d4a40c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 3 Sep 2020 15:04:40 -0700
Subject: [PATCH 060/352] update whitespace and comments

---
 include/mimalloc-types.h |  2 +-
 src/arena.c              |  8 ++++----
 src/region.c             | 10 +++++-----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 931d3270..6af46d18 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -122,7 +122,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128kb on 64-bit
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   // 64kb on 64-bit
 
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32mb on 64-bit
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 4mb on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 
 #define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
diff --git a/src/arena.c b/src/arena.c
index 959d59c5..6a15c83d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* ----------------------------------------------------------------------------
 "Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
+large blocks (>= MI_ARENA_BLOCK_SIZE, 8MiB).
 In contrast to the rest of mimalloc, the arenas are shared between
 threads and need to be accessed using atomic operations.
 
@@ -55,9 +55,9 @@ bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 // size in count of arena blocks.
 typedef uintptr_t mi_block_info_t;
 #define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
-#define MI_ARENA_BLOCK_SIZE   MI_SEGMENT_ALIGN         // 64MiB
-#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE)  // 4GiB
-#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 32MiB
+#define MI_ARENA_BLOCK_SIZE   MI_SEGMENT_ALIGN         // 8MiB
+#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE)  // 512MiB
+#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 4MiB
 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
 
 // A memory arena descriptor
diff --git a/src/region.c b/src/region.c
index e916e452..db2871d6 100644
--- a/src/region.c
+++ b/src/region.c
@@ -88,12 +88,12 @@ typedef union mi_region_info_u {
 typedef struct mem_region_s {
   _Atomic(uintptr_t)        info;        // mi_region_info_t.value
   _Atomic(void*)            start;       // start of the memory area 
-  mi_bitmap_field_t                  in_use;      // bit per in-use block
-  mi_bitmap_field_t                  dirty;       // track if non-zero per block
-  mi_bitmap_field_t                  commit;      // track if committed per block
-  mi_bitmap_field_t                  reset;       // track if reset per block
+  mi_bitmap_field_t         in_use;      // bit per in-use block
+  mi_bitmap_field_t         dirty;       // track if non-zero per block
+  mi_bitmap_field_t         commit;      // track if committed per block
+  mi_bitmap_field_t         reset;       // track if reset per block
   _Atomic(uintptr_t)        arena_memid; // if allocated from a (huge page) arena
-  uintptr_t                          padding;     // round to 8 fields
+  uintptr_t                 padding;     // round to 8 fields
 } mem_region_t;
 
 // The region map

From b22401deb3e97b24952c5b6191af79fa11733246 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 3 Sep 2020 20:31:11 -0700
Subject: [PATCH 061/352] layout

---
 src/arena.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 6a15c83d..c56c7b8e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -69,10 +69,10 @@ typedef struct mi_arena_s {
   bool     is_zero_init;                  // is the arena zero initialized?
   bool     is_committed;                  // is the memory committed
   bool     is_large;                      // large OS page allocated
-  _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
+  _Atomic(uintptr_t) search_idx;          // optimization to start the search for free blocks
   mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
   mi_bitmap_field_t* blocks_committed;    // if `!is_committed`, are the blocks committed?
-  mi_bitmap_field_t  blocks_inuse[1];       // in-place bitmap of in-use blocks (of size `field_count`)
+  mi_bitmap_field_t  blocks_inuse[1];     // in-place bitmap of in-use blocks (of size `field_count`)
 } mi_arena_t;
 
 

From dc858f6d29c6288f49f2f26bd14a220a617b3a4c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 09:23:22 -0700
Subject: [PATCH 062/352] fix c++ compilation with new atomics for dev-slice

---
 ide/vs2017/mimalloc-override.vcxproj | 8 --------
 src/arena.c                          | 6 +++---
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index 50a950b9..e48aa4d8 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -95,11 +95,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-<<<<<<< HEAD
-      <PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-=======
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
->>>>>>> dev
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <SupportJustMyCode>false</SupportJustMyCode>
       <CompileAs>Default</CompileAs>
@@ -127,11 +123,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-<<<<<<< HEAD
-      <PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-=======
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
->>>>>>> dev
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <SupportJustMyCode>false</SupportJustMyCode>
       <CompileAs>Default</CompileAs>
diff --git a/src/arena.c b/src/arena.c
index c56c7b8e..731d4e20 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -179,7 +179,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   *is_zero = false;
   bool committed = slot->is_committed;
   slot->p = NULL;
-  mi_atomic_store_release(&slot->expire,0);
+  mi_atomic_store_release(&slot->expire,(mi_msecs_t)0);
   if (*commit && !committed) {
     bool commit_zero;
     _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
@@ -213,7 +213,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
         expire = mi_atomic_load_acquire(&slot->expire);
         if (expire != 0 && now >= expire) {  // safe read
           // still expired, decommit it
-          mi_atomic_store_relaxed(&slot->expire,0);
+          mi_atomic_store_relaxed(&slot->expire,(mi_msecs_t)0);
           mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
           _mi_abandoned_await_readers();  // wait until safe to decommit
           _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
@@ -254,7 +254,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
   mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
   slot->p = start;
   slot->memid = memid;
-  mi_atomic_store_relaxed(&slot->expire,0);
+  mi_atomic_store_relaxed(&slot->expire,(mi_msecs_t)0);
   slot->is_committed = is_committed;
   if (is_committed && !is_large) {
     long delay = mi_option_get(mi_option_arena_reset_delay);

From 4df01218e2f136d5eaaa443023331902dda51da5 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 10:03:37 -0700
Subject: [PATCH 063/352] fix msvc compilation with new atomics

---
 include/mimalloc-atomic.h   | 34 ++++++++++++++++++++++++++++++++++
 include/mimalloc-types.h    |  1 -
 src/arena.c                 | 12 ++++++------
 test/main-override-static.c |  3 +++
 4 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index e6f4ba0d..bb9430b0 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -106,6 +106,13 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
   while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, &current, x)) { /* nothing */ };
 }
 
+// Used by timers
+#define mi_atomic_loadi64_acquire(p)    mi_atomic(load_explicit)(p,mi_memory_order(acquire))
+#define mi_atomic_loadi64_relaxed(p)    mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
+#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
+#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
+
+
 
 #elif defined(_MSC_VER)
 
@@ -189,6 +196,27 @@ static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)* p, uintptr_t x,
   mi_atomic_exchange_explicit(p,x,mo);
   #endif
 }
+static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)* p, mi_memory_order mo) {
+  (void)(mo);
+#if defined(_M_X64)
+  return *p;
+#else
+  int64_t old = *p;
+  int64_t x = old;
+  while ((old = InterlockedCompareExchange64(p, x, old)) != x) {
+    x = old;
+  }
+  return x;
+#endif
+}
+static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)* p, int64_t x, mi_memory_order mo) {
+  (void)(mo);
+#if defined(x_M_IX86) || defined(_M_X64)
+  *p = x;
+#else
+  InterlockedExchange64(p,x);
+#endif
+}
 
 // These are used by the statistics
 static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) {
@@ -222,6 +250,12 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t
 #define mi_atomic_exchange_ptr_release(tp,p,x)          (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
 #define mi_atomic_exchange_ptr_acq_rel(tp,p,x)          (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
 
+#define mi_atomic_loadi64_acquire(p)    mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire))
+#define mi_atomic_loadi64_relaxed(p)    mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed))
+#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release))
+#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed))
+
+
 #endif
 
 
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index d482240b..7270f798 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -280,7 +280,6 @@ typedef mi_page_t mi_slice_t;
 
 typedef int64_t  mi_msecs_t;
 
-
 // Segments are large allocated memory blocks (8mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
diff --git a/src/arena.c b/src/arena.c
index 731d4e20..c7f38c2c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -179,7 +179,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   *is_zero = false;
   bool committed = slot->is_committed;
   slot->p = NULL;
-  mi_atomic_store_release(&slot->expire,(mi_msecs_t)0);
+  mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
   if (*commit && !committed) {
     bool commit_zero;
     _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
@@ -203,17 +203,17 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
   for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
     if (idx >= MI_CACHE_MAX) idx = 0; // wrap
     mi_cache_slot_t* slot = &cache[idx];
-    mi_msecs_t expire = mi_atomic_load_relaxed(&slot->expire);
+    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
     if (expire != 0 && now >= expire) {  // racy read
       // seems expired, first claim it from available
       purged++;
       mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
       if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
         // was available, we claimed it
-        expire = mi_atomic_load_acquire(&slot->expire);
+        expire = mi_atomic_loadi64_acquire(&slot->expire);
         if (expire != 0 && now >= expire) {  // safe read
           // still expired, decommit it
-          mi_atomic_store_relaxed(&slot->expire,(mi_msecs_t)0);
+          mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
           mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
           _mi_abandoned_await_readers();  // wait until safe to decommit
           _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
@@ -254,7 +254,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
   mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
   slot->p = start;
   slot->memid = memid;
-  mi_atomic_store_relaxed(&slot->expire,(mi_msecs_t)0);
+  mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
   slot->is_committed = is_committed;
   if (is_committed && !is_large) {
     long delay = mi_option_get(mi_option_arena_reset_delay);
@@ -264,7 +264,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
       slot->is_committed = false;
     }
     else {
-      mi_atomic_store_release(&slot->expire, _mi_clock_now() + delay);
+      mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
     }
   }
   
diff --git a/test/main-override-static.c b/test/main-override-static.c
index ca65a0b2..0067be04 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -49,6 +49,7 @@ static inline uint8_t mi_bsr32(uint32_t x) {
 }
 #endif
 
+/*
 // Bit scan reverse: return the index of the highest bit.
 uint8_t _mi_bsr(uintptr_t x) {
   if (x == 0) return 0;
@@ -61,6 +62,8 @@ uint8_t _mi_bsr(uintptr_t x) {
   # error "define bsr for non-32 or 64-bit platforms"
   #endif
 }
+*/
+
 
 static inline size_t _mi_wsize_from_size(size_t size) {
   return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);

From 13bbb78907cb7276b729c36671602ccdf97dcf94 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 11:48:23 -0700
Subject: [PATCH 064/352] add dev-slice to azure test pipeline

---
 azure-pipelines.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 68c150df..d0e27ffd 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -6,6 +6,7 @@
 trigger:
 - master
 - dev
+- dev-slice
 
 jobs:
 - job:

From 3d708aa7e1f9801e1a860a9d00eef5bd8492d95e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 12:16:46 -0700
Subject: [PATCH 065/352] fix warning in g++

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index c7f38c2c..f946bfa9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -141,7 +141,7 @@ typedef struct mi_cache_slot_s {
 
 static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
 
-#define BITS_SET()  (UINTPTR_MAX)
+#define BITS_SET()  ATOMIC_VAR_INIT(UINTPTR_MAX)
 static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
 static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
 static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free

From 85a8c138fcbb861dd947aa2bfc905a4cdfcc663f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 12:18:09 -0700
Subject: [PATCH 066/352] enable verbose ctest on mac pipeline

---
 azure-pipelines.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index d0e27ffd..48c8da98 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -126,7 +126,11 @@ jobs:
       cmakeArgs: .. $(cmakeExtraArgs)
   - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
     displayName: Make
-  - script: make test -C $(BuildType)
+  #- script: make test -C $(BuildType)
+  #  displayName: CTest
+  - script: |
+      cd $(BuildType)
+      ctest --verbose --timeout 120
     displayName: CTest
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-macos-$(BuildType)

From a0370f347cddfb4f39a43bca81f83edc8e01d023 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 12:20:21 -0700
Subject: [PATCH 067/352] more verbose ctest on mac pipeline

---
 azure-pipelines.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 48c8da98..a82e6c32 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -130,6 +130,7 @@ jobs:
   #  displayName: CTest
   - script: |
       cd $(BuildType)
+      export MIMALLOC_VERBOSE=1
       ctest --verbose --timeout 120
     displayName: CTest
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)

From 0c5f03559d476b48467c1b0796cc4792dfe88f51 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 12:22:52 -0700
Subject: [PATCH 068/352] fix verbose ctest on mac pipeline

---
 azure-pipelines.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index a82e6c32..17961c62 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -130,8 +130,7 @@ jobs:
   #  displayName: CTest
   - script: |
       cd $(BuildType)
-      export MIMALLOC_VERBOSE=1
-      ctest --verbose --timeout 120
+      MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120
     displayName: CTest
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-macos-$(BuildType)

From 5fe80671a24327b5b6f12fa8b521f95bba4400a6 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 12:26:47 -0700
Subject: [PATCH 069/352] again try to fix verbose ctest on mac pipeline

---
 azure-pipelines.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 17961c62..7fc80a5e 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -128,9 +128,8 @@ jobs:
     displayName: Make
   #- script: make test -C $(BuildType)
   #  displayName: CTest
-  - script: |
-      cd $(BuildType)
-      MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120
+  - script: MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120
+    workingDirectory: $(BuildType)
     displayName: CTest
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-macos-$(BuildType)

From 7a08ca4dc6b687fa2b9351aa338a6aba43449861 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 12:30:13 -0700
Subject: [PATCH 070/352] again try to fix verbose ctest on mac pipeline

---
 azure-pipelines.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 7fc80a5e..56f00790 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -128,8 +128,11 @@ jobs:
     displayName: Make
   #- script: make test -C $(BuildType)
   #  displayName: CTest
-  - script: MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120
+  - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress
     workingDirectory: $(BuildType)
-    displayName: CTest
+    displayName: TestStress    
+  - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api
+    workingDirectory: $(BuildType)
+    displayName: TestAPI
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-macos-$(BuildType)

From 8834fe02da49b42b31fe2dd2d5e02a90dfc7ad14 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 12:31:28 -0700
Subject: [PATCH 071/352] again try to fix verbose ctest on mac pipeline

---
 azure-pipelines.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 56f00790..b0933cd3 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -128,11 +128,11 @@ jobs:
     displayName: Make
   #- script: make test -C $(BuildType)
   #  displayName: CTest
-  - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress
-    workingDirectory: $(BuildType)
-    displayName: TestStress    
   - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api
     workingDirectory: $(BuildType)
     displayName: TestAPI
+  - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress
+    workingDirectory: $(BuildType)
+    displayName: TestStress    
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-macos-$(BuildType)

From 63a9f45ba64e44162fa0de41030e0d012c5bcbba Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 19:39:10 -0700
Subject: [PATCH 072/352] add initial mi_commit_mask abstraction

---
 include/mimalloc-internal.h | 76 +++++++++++++++++++++++++++++++++++++
 include/mimalloc-types.h    | 10 +++--
 src/os.c                    |  1 +
 src/segment.c               | 61 +++++++++++++----------------
 4 files changed, 109 insertions(+), 39 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index a113b121..bcced4cb 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -650,6 +650,82 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
   #endif
 }
 
+
+// -------------------------------------------------------------------
+// commit mask
+// -------------------------------------------------------------------
+
+static inline mi_commit_mask_t mi_commit_mask_empty(void) {
+  return 0;
+}
+
+static inline mi_commit_mask_t mi_commit_mask_full(void) {
+  return ~mi_commit_mask_empty();
+}
+
+static inline mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) {
+  mi_assert_internal(bitidx < MI_INTPTR_BITS);
+  mi_assert_internal((bitidx + bitcount) <= MI_INTPTR_BITS);
+  if (bitcount == MI_INTPTR_BITS) {
+    mi_assert_internal(bitidx==0);
+    return mi_commit_mask_full();
+  }
+  else if (bitcount == 0) {
+    return mi_commit_mask_empty();
+  }
+  else {
+    return (((uintptr_t)1 << bitcount) - 1) << bitidx;
+  }
+}
+
+static inline bool mi_commit_mask_is_empty(mi_commit_mask_t mask) {
+  return (mask == 0);
+}
+
+static inline bool mi_commit_mask_is_full(mi_commit_mask_t mask) {
+  return (~mask == 0);
+}
+
+static inline bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
+  return ((commit & mask) == mask);
+}
+
+static inline bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
+  return ((commit & mask) != 0);
+}
+
+static mi_decl_nodiscard inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) {
+  return (commit & mask);
+}
+
+static inline void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
+  *commit = *commit & ~mask;
+}
+
+static inline void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
+  *commit = *commit | mask;
+}
+
+#define mi_commit_mask_foreach(mask,idx,count) \
+  idx = 0; \
+  while (mask != 0) {     \
+    /* count ones */      \
+    count = 0;            \
+    while ((mask&1)==1) { \
+      mask >>= 1;         \
+      count++;            \
+    }                     \
+    /* if found, do action */ \
+    if (count > 0) {
+
+#define mi_commit_mask_foreach_end() \
+    } \
+    idx += count; \
+    /* shift out the zero */ \
+    mask >>= 1;   \
+    idx++;        \
+  }
+
 // -------------------------------------------------------------------
 // Fast "random" shuffle
 // -------------------------------------------------------------------
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 7270f798..06ab1ebe 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -276,9 +276,11 @@ typedef enum mi_segment_kind_e {
 #error "not enough commit bits to cover the segment size"
 #endif
 
-typedef mi_page_t mi_slice_t;
+typedef mi_page_t  mi_slice_t;
 
-typedef int64_t  mi_msecs_t;
+typedef int64_t    mi_msecs_t;
+
+typedef uintptr_t  mi_commit_mask_t;
 
 // Segments are large allocated memory blocks (8mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
@@ -290,8 +292,8 @@ typedef struct mi_segment_s {
 
   bool              allow_decommit;
   mi_msecs_t        decommit_expire;
-  uintptr_t         decommit_mask;
-  uintptr_t         commit_mask;
+  mi_commit_mask_t  decommit_mask;
+  mi_commit_mask_t  commit_mask;
 
   _Atomic(struct mi_segment_s*) abandoned_next;
 
diff --git a/src/os.c b/src/os.c
index cddc5b51..5c38989d 100644
--- a/src/os.c
+++ b/src/os.c
@@ -624,6 +624,7 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
 
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld)
 {
+  UNUSED(tld);
   if (size == 0) return NULL;
   size = _mi_os_good_alloc_size(size);
   alignment = _mi_align_up(alignment, _mi_os_page_size());
diff --git a/src/segment.c b/src/segment.c
index ac6d1a79..34fdf0bd 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -268,8 +268,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // mi_segment_delayed_decommit(segment,true,tld->stats);
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
-  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, 
-                   (~segment->commit_mask == 0 && segment->decommit_mask == 0), segment->mem_is_fixed, tld->os);
+  bool fully_committed = (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask));  
+  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, fully_committed, segment->mem_is_fixed, tld->os);
 }
 
 
@@ -339,7 +339,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
    Span management
 ----------------------------------------------------------- */
 
-static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size) {
+static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size) {
   mi_assert_internal(_mi_ptr_segment(p) == segment);
   if (size == 0 || size > MI_SEGMENT_SIZE) return 0;
   if (p >= (uint8_t*)segment + mi_segment_size(segment)) return 0;
@@ -370,39 +370,38 @@ static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative
   }
   mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8));
 
-  uintptr_t mask = (((uintptr_t)1 << bitcount) - 1) << bitidx;
-  return mask;
+  return mi_commit_mask_create(bitidx, bitcount);
 }
 
 static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
   // commit liberal, but decommit conservative
   uint8_t* start;
   size_t   full_size;
-  uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size);  
-  if (mask==0 || full_size==0) return true;
+  mi_commit_mask_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size);  
+  if (mi_commit_mask_is_empty(mask) || full_size==0) return true;
 
-  if (commit && (segment->commit_mask & mask) != mask) {
+  if (commit && !mi_commit_mask_all_set(segment->commit_mask, mask)) {
     bool is_zero = false;
     if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;
-    segment->commit_mask |= mask;     
+    mi_commit_mask_set(&segment->commit_mask,mask);     
   }
-  else if (!commit && (segment->commit_mask & mask) != 0) {
+  else if (!commit && mi_commit_mask_any_set(segment->commit_mask,mask)) {
     mi_assert_internal((void*)start != (void*)segment);
     _mi_os_decommit(start, full_size, stats);  // ok if this fails
-    segment->commit_mask &= ~mask;
+    mi_commit_mask_clear(&segment->commit_mask, mask);
   }
   // increase expiration of reusing part of the delayed decommit
-  if (commit && (segment->decommit_mask & mask) != 0) {
+  if (commit && mi_commit_mask_any_set(segment->decommit_mask, mask)) {
     segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
   }
   // always undo delayed decommits
-  segment->decommit_mask &= ~mask;   
+  mi_commit_mask_clear(&segment->decommit_mask, mask);   
   mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask);
   return true;
 }
 
 static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  if (~segment->commit_mask == 0 && segment->decommit_mask==0) return true; // fully committed
+  if (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask)) return true; // fully committed
   return mi_segment_commitx(segment,true,p,size,stats);
 }
 
@@ -416,44 +415,36 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     // register for future decommit in the decommit mask
     uint8_t* start;
     size_t   full_size;
-    uintptr_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size);
-    if (mask==0 || full_size==0) return;
+    mi_commit_mask_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size);
+    if (mi_commit_mask_is_empty(mask) || full_size==0) return;
     
     // update delayed commit
-    segment->decommit_mask |= (mask & segment->commit_mask);  // only decommit what is committed; span_free may try to decommit more
+    mi_commit_mask_set(&segment->decommit_mask, mi_commit_mask_intersect(mask,segment->commit_mask));  // only decommit what is committed; span_free may try to decommit more
     segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
   }  
 }
 
 static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) {
-  if (segment->decommit_mask == 0) return;
+  if (mi_commit_mask_is_empty(segment->decommit_mask)) return;
   mi_msecs_t now = _mi_clock_now();
   if (!force && now < segment->decommit_expire) return;
 
-  uintptr_t mask = segment->decommit_mask;
+  mi_commit_mask_t mask = segment->decommit_mask;
   segment->decommit_expire = 0;
-  segment->decommit_mask = 0;
+  segment->decommit_mask = mi_commit_mask_empty();
 
-  uintptr_t idx = 0;
-  while (mask != 0) {
-    // count ones
-    size_t count = 0;
-    while ((mask&1)==1) {
-      mask >>= 1;
-      count++;
-    }
+  uintptr_t idx;
+  uintptr_t count;
+  mi_commit_mask_foreach(mask, idx, count) {
     // if found, decommit that sequence
     if (count > 0) {
       uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE);
       size_t size = count * MI_COMMIT_SIZE;
       mi_segment_commitx(segment, false, p, size, stats);
-      idx += count;
     }
-    // shift out the 0
-    mask >>= 1;
-    idx++;
   }
-  mi_assert_internal(segment->decommit_mask == 0);
+  mi_commit_mask_foreach_end()
+  mi_assert_internal(mi_commit_mask_is_empty(segment->decommit_mask));
 }
 
 
@@ -693,10 +684,10 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   }
 
   if (!commit_info_still_good) {
-    segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed
+    segment->commit_mask = (!commit ? 0x01 : mi_commit_mask_full()); // on lazy commit, the initial part is always committed
     segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_fixed);
     segment->decommit_expire = 0;
-    segment->decommit_mask = 0;
+    segment->decommit_mask = mi_commit_mask_empty();
   }
 
   // initialize segment info

From f7dc4847f26533ba6bf1a59b6955db4b497a35d7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 21:58:32 -0700
Subject: [PATCH 073/352] keep commit_mask live in the cache for better reuse

---
 include/mimalloc-internal.h |  39 +++++++++----
 src/arena.c                 | 109 ++++++++++++++++++++++++++----------
 src/options.c               |   4 +-
 src/segment.c               |  29 ++++++----
 4 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index bcced4cb..8004ba84 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -71,9 +71,9 @@ bool      _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats)
 size_t    _mi_os_good_alloc_size(size_t size);
 
 // arena.c
-void*     _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-void*     _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-void      _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld);
+void*     _mi_arena_alloc_aligned(size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*     _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void      _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld);
 
 
 // "segment.c"
@@ -655,6 +655,8 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
 // commit mask
 // -------------------------------------------------------------------
 
+#define MI_COMMIT_MASK_BITS  (sizeof(mi_commit_mask_t)*8)
+
 static inline mi_commit_mask_t mi_commit_mask_empty(void) {
   return 0;
 }
@@ -664,9 +666,9 @@ static inline mi_commit_mask_t mi_commit_mask_full(void) {
 }
 
 static inline mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) {
-  mi_assert_internal(bitidx < MI_INTPTR_BITS);
-  mi_assert_internal((bitidx + bitcount) <= MI_INTPTR_BITS);
-  if (bitcount == MI_INTPTR_BITS) {
+  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
+  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
+  if (bitcount == MI_COMMIT_MASK_BITS) {
     mi_assert_internal(bitidx==0);
     return mi_commit_mask_full();
   }
@@ -683,7 +685,7 @@ static inline bool mi_commit_mask_is_empty(mi_commit_mask_t mask) {
 }
 
 static inline bool mi_commit_mask_is_full(mi_commit_mask_t mask) {
-  return (~mask == 0);
+  return ((~mask) == 0);
 }
 
 static inline bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
@@ -694,18 +696,35 @@ static inline bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mas
   return ((commit & mask) != 0);
 }
 
-static mi_decl_nodiscard inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) {
+mi_decl_nodiscard static inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) {
   return (commit & mask);
 }
 
 static inline void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
-  *commit = *commit & ~mask;
+  *commit = (*commit) & (~mask);
 }
 
 static inline void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
-  *commit = *commit | mask;
+  *commit = (*commit) | mask;
 }
 
+static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total) {
+  if (mi_commit_mask_is_full(mask)) {
+    return total;
+  }
+  else if (mi_commit_mask_is_empty(mask)) {
+    return 0;
+  }
+  else {
+    size_t count = 0;
+    for (; mask != 0; mask >>= 1) {  // todo: use popcount
+      if ((mask&1)!=0) count++;
+    }
+    return (total/MI_COMMIT_MASK_BITS)*count;
+  }
+}
+
+
 #define mi_commit_mask_foreach(mask,idx,count) \
   idx = 0; \
   while (mask != 0) {     \
diff --git a/src/arena.c b/src/arena.c
index f946bfa9..e7ea7bb1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -135,8 +135,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 typedef struct mi_cache_slot_s {
   void*               p;
   size_t              memid;
+  mi_commit_mask_t    commit_mask;
   _Atomic(mi_msecs_t) expire;
-  bool                is_committed;  // TODO: use bit from p to reduce size?
 } mi_cache_slot_t;
 
 static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
@@ -147,7 +147,10 @@ static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BIT
 static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
 
-static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
+static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
+  UNUSED(tld);
+  UNUSED(commit);
+
   // only segment blocks
   if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL;
 
@@ -177,24 +180,55 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co
   void* p = slot->p;
   *memid = slot->memid;
   *is_zero = false;
-  bool committed = slot->is_committed;
+  mi_commit_mask_t cmask = slot->commit_mask;  // copy
   slot->p = NULL;
   mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
-  if (*commit && !committed) {
+  // ignore commit request
+  /*
+  if (commit && !mi_commit_mask_is_full(cmask)) {
     bool commit_zero;
-    _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats);
-    *commit = true;
+    bool ok = _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); // todo: only commit needed parts?
+    if (!ok) {
+      *commit_mask = cmask;
+    }
+    else {
+      *commit_mask = mi_commit_mask_full();
+    }
   }
   else {
-    *commit = committed;
-  }
-
+  */
+  *commit_mask = cmask;
+  
   // mark the slot as free again
   mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
   mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
   return p;
 }
 
+static void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) {
+  if (mi_commit_mask_is_empty(*cmask)) {
+    // nothing
+  }    
+  else if (mi_commit_mask_is_full(*cmask)) {
+    _mi_os_decommit(p, total, stats);
+  }
+  else {
+    // todo: one call to decommit the whole at once?
+    mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
+    size_t    part = total/MI_COMMIT_MASK_BITS;
+    uintptr_t idx;
+    uintptr_t count;
+    mi_commit_mask_t mask = *cmask;
+    mi_commit_mask_foreach(mask, idx, count) {
+      void*  start = (uint8_t*)p + (idx*part);
+      size_t size = count*part;
+      _mi_os_decommit(start, size, stats);
+    }
+    mi_commit_mask_foreach_end()
+  }
+  *cmask = mi_commit_mask_empty();
+}
+
 static void mi_cache_purge(mi_os_tld_t* tld) {
   UNUSED(tld);
   mi_msecs_t now = _mi_clock_now();
@@ -214,10 +248,11 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
         if (expire != 0 && now >= expire) {  // safe read
           // still expired, decommit it
           mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-          mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+          mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
           _mi_abandoned_await_readers();  // wait until safe to decommit
-          _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
-          slot->is_committed = false;
+          // decommit committed parts
+          mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
+          //_mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
         }
         mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
@@ -226,7 +261,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
   }
 }
 
-static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) 
+static bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) 
 {
   // only for segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
@@ -255,13 +290,12 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
   slot->p = start;
   slot->memid = memid;
   mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-  slot->is_committed = is_committed;
-  if (is_committed && !is_large) {
+  slot->commit_mask = commit_mask;
+  if (!mi_commit_mask_is_empty(commit_mask) && !is_large) {
     long delay = mi_option_get(mi_option_arena_reset_delay);
     if (delay == 0) {
       _mi_abandoned_await_readers(); // wait until safe to decommit
-      _mi_os_decommit(start, size, tld->stats);
-      slot->is_committed = false;
+      mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
     }
     else {
       mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
@@ -311,10 +345,10 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
 }
 
 void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
-                              bool* commit, bool* large, bool* is_zero,
+                              bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero,
                               size_t* memid, mi_os_tld_t* tld)
 {
-  mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
+  mi_assert_internal(commit_mask != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
   *memid   = MI_MEMID_OS;
   *is_zero = false;
@@ -339,9 +373,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
       if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
       {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
+        bool acommit = commit;
+        void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
         mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
+        if (p != NULL) {
+          *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
+          return p;
+        }
       }
     }
     // try from another numa node instead..
@@ -351,43 +389,52 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
       if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
       {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
+        bool acommit = commit;
+        void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
         mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
+        if (p != NULL) {
+          *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
+          return p;
+        }
       }
     }
   }
 
   // try to get from the cache 
-  void* p = mi_cache_pop(numa_node, size, alignment, commit, large, is_zero, memid, tld);
+  void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld);
   if (p != NULL) return p;
 
 
   // finally, fall back to the OS
   *is_zero = true;
   *memid   = MI_MEMID_OS;
-  return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
+  p = _mi_os_alloc_aligned(size, alignment, commit, large, tld);
+  *commit_mask = ((p!=NULL && commit) ? mi_commit_mask_full() : mi_commit_mask_empty());
+  return p;
 }
 
-void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) 
+void* _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) 
 {
-  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld);
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, commit_mask, large, is_zero, memid, tld);
 }
 
 /* -----------------------------------------------------------
   Arena free
 ----------------------------------------------------------- */
 
-void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) {
+void _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) {
   mi_assert_internal(size > 0 && tld->stats != NULL);
   if (p==NULL) return;
   if (size==0) return;
 
   if (memid == MI_MEMID_OS) {
     // was a direct OS allocation, pass through
-    if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) {
-      _mi_abandoned_await_readers(); // wait unti safe to free
-      _mi_os_free_ex(p, size, is_committed, tld->stats);
+    if (!mi_cache_push(p, size, memid, commit_mask, is_large, tld)) {
+      _mi_abandoned_await_readers(); // wait until safe to free
+      // TODO: is it safe on all platforms to free even it contains decommitted parts? (eg. macOS)
+      const size_t csize = mi_commit_mask_committed_size(commit_mask, size);
+      _mi_stat_decrease(&_mi_stats_main.committed, csize);
+      _mi_os_free_ex(p, size, false /*pretend decommitted to not double count stats*/, tld->stats);
     }
   }
   else {
diff --git a/src/options.c b/src/options.c
index 5fa9e2e7..fe94a1fb 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (4MiB)  (but see also `eager_commit_delay`)
+  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
   #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
   { 0, UNINIT, MI_OPTION(eager_region_commit) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
@@ -82,7 +82,7 @@ static mi_option_desc_t options[_mi_option_last] =
 #if defined(__NetBSD__)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
 #else
-  { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
+  { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit pages when not eager committed
   { 250,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
diff --git a/src/segment.c b/src/segment.c
index 34fdf0bd..7d2e12f4 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -137,7 +137,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(segment->abandoned <= segment->used);
   mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
-  mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask); // can only decommit committed blocks
+  mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask)); // can only decommit committed blocks
   //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
   mi_slice_t* slice = &segment->slices[0];
   const mi_slice_t* end = mi_segment_slices_end(segment);
@@ -268,8 +268,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // mi_segment_delayed_decommit(segment,true,tld->stats);
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
-  bool fully_committed = (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask));  
-  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, fully_committed, segment->mem_is_fixed, tld->os);
+  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->commit_mask, segment->mem_is_fixed, tld->os);
 }
 
 
@@ -382,11 +381,15 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
 
   if (commit && !mi_commit_mask_all_set(segment->commit_mask, mask)) {
     bool is_zero = false;
-    if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;
+    mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask);
+    _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
+    if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;    
     mi_commit_mask_set(&segment->commit_mask,mask);     
   }
   else if (!commit && mi_commit_mask_any_set(segment->commit_mask,mask)) {
     mi_assert_internal((void*)start != (void*)segment);
+    mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask);
+    _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
     _mi_os_decommit(start, full_size, stats);  // ok if this fails
     mi_commit_mask_clear(&segment->commit_mask, mask);
   }
@@ -401,6 +404,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
 }
 
 static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
+  mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask));
   if (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask)) return true; // fully committed
   return mi_segment_commitx(segment,true,p,size,stats);
 }
@@ -648,29 +652,30 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
   const bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
-  bool commit = eager || (required > 0); 
+  const bool commit = eager || (required > 0); 
   
   // Try to get from our cache first
   bool is_zero = false;
   const bool commit_info_still_good = (segment != NULL);
+  mi_commit_mask_t commit_mask = (segment != NULL ? segment->commit_mask : mi_commit_mask_empty());
   if (segment==NULL) {
     // Allocate the segment from the OS
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
     size_t memid = 0;
-    // segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld);
-    segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
-
+    segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &commit_mask, &mem_large, &is_zero, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
-    if (!commit) {
+
+    if (!mi_commit_mask_all_set(commit_mask,mi_commit_mask_create(0, 1))) {
       // at least commit the info slices
       mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE);
       bool ok = _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats);
-      if (!ok) return NULL; // failed to commit
+      if (!ok) return NULL; // failed to commit   
+      mi_commit_mask_set(&commit_mask,mi_commit_mask_create(0, 1)); 
     }
     segment->memid = memid;
     segment->mem_is_fixed = mem_large;
-    segment->mem_is_committed = commit;
+    segment->mem_is_committed = mi_commit_mask_is_full(commit_mask);
     mi_segments_track_size((long)(segment_size), tld);
     mi_segment_map_allocated_at(segment);
   }
@@ -684,7 +689,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   }
 
   if (!commit_info_still_good) {
-    segment->commit_mask = (!commit ? 0x01 : mi_commit_mask_full()); // on lazy commit, the initial part is always committed
+    segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
     segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_fixed);
     segment->decommit_expire = 0;
     segment->decommit_mask = mi_commit_mask_empty();

From f9ca7cd05a8fa83e69198a92a12baf9f37257712 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Sep 2020 22:16:58 -0700
Subject: [PATCH 074/352] use proper file descriptor in mmap for decommit

---
 src/os.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/os.c b/src/os.c
index 5c38989d..4696a4d7 100644
--- a/src/os.c
+++ b/src/os.c
@@ -345,6 +345,17 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr
   return p;
 }
 
+static int mi_unix_mmap_fd(void) {
+#if defined(VM_MAKE_TAG)
+  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
+  int os_tag = (int)mi_option_get(mi_option_os_tag);
+  if (os_tag < 100 || os_tag > 255) os_tag = 100;
+  return VM_MAKE_TAG(os_tag);
+#else
+  return -1;
+#endif
+}
+
 static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
   void* p = NULL;
   #if !defined(MAP_ANONYMOUS)
@@ -354,7 +365,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   #define MAP_NORESERVE  0
   #endif
   int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-  int fd = -1;
+  const int fd = mi_unix_mmap_fd();
   #if defined(MAP_ALIGNED)  // BSD
   if (try_alignment > 0) {
     size_t n = _mi_bsr(try_alignment);
@@ -365,13 +376,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   #endif
   #if defined(PROT_MAX)
   protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
-  #endif
-  #if defined(VM_MAKE_TAG)
-  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
-  int os_tag = (int)mi_option_get(mi_option_os_tag);
-  if (os_tag < 100 || os_tag > 255) os_tag = 100;
-  fd = VM_MAKE_TAG(os_tag);
-  #endif
+  #endif  
   if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
     static _Atomic(uintptr_t) large_page_try_ok; // = 0;
     uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
@@ -713,7 +718,8 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   #elif defined(MAP_FIXED)
   if (!commit) {
     // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
-    void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0);
+    const int fd = mi_unix_mmap_fd();
+    void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
     if (p != start) { err = errno; }
   }
   else {

From a372847ccfbcc37afdcd64fb3f81f7d3d698d1cf Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 08:57:56 -0700
Subject: [PATCH 075/352] verbose ctest on Linux pipeline

---
 azure-pipelines.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 85529e2a..a922e569 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -100,7 +100,8 @@ jobs:
       cmakeArgs: .. $(cmakeExtraArgs)
   - script: make -j$(nproc) -C $(BuildType)
     displayName: Make
-  - script: make test -C $(BuildType)
+  - script: ctest --verbose --timeout 120
+    workingDirectory: $(BuildType)
     displayName: CTest
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-ubuntu-$(BuildType)

From e703bfc3192569dcf9984e536027f60ca887100b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 09:02:15 -0700
Subject: [PATCH 076/352] build windows pipeline in parallel

---
 azure-pipelines.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index a922e569..dd544d18 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -37,6 +37,7 @@ jobs:
     inputs:
       solution: $(BuildType)/libmimalloc.sln
       configuration: '$(MSBuildConfiguration)'
+      msbuildArguments: -m
   - script: |
       cd $(BuildType)
       ctest --verbose --timeout 120

From e2ae9f3125b9e45025858f700e967b22a8033bce Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 09:14:32 -0700
Subject: [PATCH 077/352] fix pipeline script for macOS

---
 azure-pipelines.yml | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index a25fa3a7..4ce98c50 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -129,20 +129,16 @@ jobs:
       cmakeArgs: .. $(cmakeExtraArgs)
   - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
     displayName: Make
-<<<<<<< HEAD
-  #- script: make test -C $(BuildType)
-  #  displayName: CTest
-  - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api
-    workingDirectory: $(BuildType)
-    displayName: TestAPI
-  - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress
-    workingDirectory: $(BuildType)
-    displayName: TestStress    
-=======
+  # - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api
+  #   workingDirectory: $(BuildType)
+  #   displayName: TestAPI
+  # - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress
+  #   workingDirectory: $(BuildType)
+  #   displayName: TestStress    
   - script: ctest --verbose --timeout 120
     workingDirectory: $(BuildType)
     displayName: CTest
->>>>>>> dev
+    
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-macos-$(BuildType)
 

From 5ae01fe4d92de2b94bee45f7b1d3f8df322bab40 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 09:39:16 -0700
Subject: [PATCH 078/352] experiment with commit strategy on macOS

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index 4696a4d7..6e8f6149 100644
--- a/src/os.c
+++ b/src/os.c
@@ -715,7 +715,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   }
   #elif defined(__wasi__)
   // WebAssembly guests can't control memory protection
-  #elif defined(MAP_FIXED)
+  #elif defined(MAP_FIXED) && !defined(__APPLE__)
   if (!commit) {
     // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
     const int fd = mi_unix_mmap_fd();

From 828613a6942e6891bff514d42198d890114c3296 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 12:06:56 -0700
Subject: [PATCH 079/352] use MADV_DONTNEED for commit/decommit on macOS

---
 src/os.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/os.c b/src/os.c
index 6e8f6149..13785b65 100644
--- a/src/os.c
+++ b/src/os.c
@@ -716,20 +716,35 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   #elif defined(__wasi__)
   // WebAssembly guests can't control memory protection
   #elif defined(MAP_FIXED) && !defined(__APPLE__)
+  // Linux
   if (!commit) {
-    // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
+    // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss)
     const int fd = mi_unix_mmap_fd();
     void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
     if (p != start) { err = errno; }
   }
   else {
-    // for commit, just change the protection
+    // commit: just change the protection
     err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
     if (err != 0) { err = errno; }
   }
   #else
-  err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE));
-  if (err != 0) { err = errno; }
+  // MacOS and others.
+  if (!commit) {
+    #if defined(MADV_DONTNEED)
+    // decommit: use MADV_DONTNEED as it decrease rss immediately (unlike MADV_FREE)
+    err = madvise(start, csize, MADV_DONTNEED);
+    #else
+    // decommit: just disable access
+    err = mprotect(start, csize, PROT_NONE);
+    if (err != 0) { err = errno; }
+    #endif
+  }
+  else {
+    // commit: ensure we can access the area
+    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
+    if (err != 0) { err = errno; }
+  }
   #endif
   if (err != 0) {
     _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);

From 8e0d846b40fa44225227e01531c6b45a1f6f79a8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 12:19:05 -0700
Subject: [PATCH 080/352] consistent commit order

---
 src/os.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/os.c b/src/os.c
index 13785b65..7cede199 100644
--- a/src/os.c
+++ b/src/os.c
@@ -704,8 +704,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
 
   #if defined(_WIN32)
   if (commit) {
-    // if the memory was already committed, the call succeeds but it is not zero'd
-    // *is_zero = true;
+    // *is_zero = true;  // note: if the memory was already committed, the call succeeds but the memory is not zero'd
     void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE);
     err = (p == start ? 0 : GetLastError());
   }
@@ -717,22 +716,27 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   // WebAssembly guests can't control memory protection
   #elif defined(MAP_FIXED) && !defined(__APPLE__)
   // Linux
-  if (!commit) {
+  if (commit) {
+    // commit: just change the protection
+    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
+    if (err != 0) { err = errno; }
+  } 
+  else {
     // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss)
     const int fd = mi_unix_mmap_fd();
     void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
     if (p != start) { err = errno; }
   }
-  else {
-    // commit: just change the protection
+  #else
+  // macOSX and others.
+  if (commit) {
+    // commit: ensure we can access the area
     err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
     if (err != 0) { err = errno; }
-  }
-  #else
-  // MacOS and others.
-  if (!commit) {
+  } 
+  else {
     #if defined(MADV_DONTNEED)
-    // decommit: use MADV_DONTNEED as it decrease rss immediately (unlike MADV_FREE)
+    // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
     err = madvise(start, csize, MADV_DONTNEED);
     #else
     // decommit: just disable access
@@ -740,11 +744,6 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
     if (err != 0) { err = errno; }
     #endif
   }
-  else {
-    // commit: ensure we can access the area
-    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
-    if (err != 0) { err = errno; }
-  }
   #endif
   if (err != 0) {
     _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);

From 45300ac43d500836fa496167fbf64b84be7b65f3 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 13:24:47 -0700
Subject: [PATCH 081/352] merge from dev

---
 include/mimalloc-atomic.h | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index b25a0ac3..b6506075 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -217,27 +217,6 @@ static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi
   InterlockedExchange64(p, x);
 #endif
 }
-static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)* p, mi_memory_order mo) {
-  (void)(mo);
-#if defined(_M_X64)
-  return *p;
-#else
-  int64_t old = *p;
-  int64_t x = old;
-  while ((old = InterlockedCompareExchange64(p, x, old)) != x) {
-    x = old;
-  }
-  return x;
-#endif
-}
-static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)* p, int64_t x, mi_memory_order mo) {
-  (void)(mo);
-#if defined(x_M_IX86) || defined(_M_X64)
-  *p = x;
-#else
-  InterlockedExchange64(p,x);
-#endif
-}
 
 // These are used by the statistics
 static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) {

From 38261322409d813e8fd7edbc22501bd4790363de Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 14:51:20 -0700
Subject: [PATCH 082/352] use dynamic initial commit

---
 src/segment.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 7d2e12f4..d5c8a6ec 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -411,7 +411,6 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_
 
 static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
   if (!segment->allow_decommit) return;
-  if (segment->commit_mask == 1) return; // fully decommitted (1 = the initial segment metadata span)
   if (mi_option_get(mi_option_reset_delay) == 0) {
     mi_segment_commitx(segment, false, p, size, stats);
   }
@@ -666,12 +665,14 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     if (segment == NULL) return NULL;  // failed to allocate
     mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
-    if (!mi_commit_mask_all_set(commit_mask,mi_commit_mask_create(0, 1))) {
+    const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
+    mi_assert_internal(commit_needed>0);
+    if (!mi_commit_mask_all_set(commit_mask,mi_commit_mask_create(0, commit_needed))) {
       // at least commit the info slices
-      mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE);
-      bool ok = _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats);
+      mi_assert_internal(commit_needed*MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE);
+      bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, &is_zero, tld->stats);
       if (!ok) return NULL; // failed to commit   
-      mi_commit_mask_set(&commit_mask,mi_commit_mask_create(0, 1)); 
+      mi_commit_mask_set(&commit_mask,mi_commit_mask_create(0, commit_needed)); 
     }
     segment->memid = memid;
     segment->mem_is_fixed = mem_large;

From 953bbde089ec4e163dabf508f2f77d34920f600e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 6 Sep 2020 15:09:51 -0700
Subject: [PATCH 083/352] fix is_in_same_page check

---
 include/mimalloc-internal.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 8004ba84..1575c1fc 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -585,7 +585,11 @@ static inline bool mi_is_in_same_segment(const void* p, const void* q) {
 static inline bool mi_is_in_same_page(const void* p, const void* q) {
   mi_segment_t* segment = _mi_ptr_segment(p);
   if (_mi_ptr_segment(q) != segment) return false;
-  return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
+  // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
+  mi_page_t* page = _mi_segment_page_of(segment, p);
+  size_t psize;
+  uint8_t* start = _mi_segment_page_start(segment, page, &psize);
+  return (start <= (uint8_t*)q && (uint8_t*)q < start + psize);
 }
 
 static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {

From 313008ecaa27b016e6bd78e1a83ebafe5fe381bb Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 7 Sep 2020 15:20:59 -0700
Subject: [PATCH 084/352] ensure page->retire_expire is always 1

---
 src/page.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/page.c b/src/page.c
index 9d919cfa..d9b5412d 100644
--- a/src/page.c
+++ b/src/page.c
@@ -389,6 +389,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   _mi_segment_page_free(page, force, segments_tld);
 }
 
+// Retire parameters
 #define MI_MAX_RETIRE_SIZE    MI_MEDIUM_OBJ_SIZE_MAX  
 #define MI_RETIRE_CYCLES      (8)
 
@@ -415,7 +416,7 @@ void _mi_page_retire(mi_page_t* page) {
   if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) {
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
+      page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);      
       mi_heap_t* heap = mi_page_heap(page);
       mi_assert_internal(pq >= heap->pages);
       const size_t index = pq - heap->pages;

From 6b013d5f38b9349fd21c35e49c90bef83612060f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 7 Sep 2020 22:55:36 -0700
Subject: [PATCH 085/352] test for arena count early; skip test in bitmap_mask_
 for perf

---
 src/arena.c      | 78 ++++++++++++++++++++++--------------------------
 src/bitmap.inc.c |  5 ++--
 2 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 3556f6e4..1ed782ef 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -8,23 +8,18 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* ----------------------------------------------------------------------------
 "Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_BLOCK_SIZE, 8MiB).
+large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB).
 In contrast to the rest of mimalloc, the arenas are shared between
 threads and need to be accessed using atomic operations.
 
 Currently arenas are only used to for huge OS page (1GiB) reservations,
-otherwise it delegates to direct allocation from the OS.
+or direct OS memory reservations -- otherwise it delegates to direct allocation from the OS.
 In the future, we can expose an API to manually add more kinds of arenas
 which is sometimes needed for embedded devices or shared memory for example.
 (We can also employ this with WASI or `sbrk` systems to reserve large arenas
  on demand and be able to reuse them efficiently).
 
-The arena allocation needs to be thread safe and we use an atomic
-bitmap to allocate. The current implementation of the bitmap can
-only do this within a field (`uintptr_t`) so we can allocate at most
-blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
-can lead to fragmentation but fortunately most objects will be regions
-of 256MiB in practice.
+The arena allocation needs to be thread safe and we use an atomic bitmap to allocate.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
@@ -55,7 +50,7 @@ bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 // size in count of arena blocks.
 typedef uintptr_t mi_block_info_t;
 #define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
-#define MI_ARENA_BLOCK_SIZE   MI_SEGMENT_ALIGN         // 8MiB
+#define MI_ARENA_BLOCK_SIZE   MI_SEGMENT_SIZE          // 8MiB
 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 4MiB
 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
 
@@ -352,43 +347,42 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
   const int numa_node = _mi_os_numa_node(tld); // current numa node
 
   // try to allocate in an arena if the alignment is small enough
-  // and the object is not too large or too small.
-  if (alignment <= MI_SEGMENT_ALIGN &&
-      // size <= MI_ARENA_MAX_OBJ_SIZE &&
-      size >= MI_ARENA_MIN_OBJ_SIZE)
-  {
-    const size_t bcount = mi_block_count_of_size(size);
-    
-    mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
-    // try numa affine allocation
-    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena==NULL) break; // end reached
-      if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
+  // and the object is not too large or too small.  
+  if (alignment <= MI_SEGMENT_ALIGN && size >= MI_ARENA_MIN_OBJ_SIZE) {
+    const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+    if (mi_unlikely(max_arena > 0)) {
+      const size_t bcount = mi_block_count_of_size(size);
+      mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+      // try numa affine allocation
+      for (size_t i = 0; i < max_arena; i++) {
+        mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+        if (arena==NULL) break; // end reached
+        if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        bool acommit = commit;
-        void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) {
-          *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
-          return p;
+        {
+          bool acommit = commit;
+          void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
+          mi_assert_internal((uintptr_t)p % alignment == 0);
+          if (p != NULL) {
+            *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
+            return p;
+          }
         }
       }
-    }
-    // try from another numa node instead..
-    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena==NULL) break; // end reached
-      if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
+      // try from another numa node instead..
+      for (size_t i = 0; i < max_arena; i++) {
+        mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+        if (arena==NULL) break; // end reached
+        if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        bool acommit = commit;
-        void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) {
-          *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
-          return p;
+        {
+          bool acommit = commit;
+          void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
+          mi_assert_internal((uintptr_t)p % alignment == 0);
+          if (p != NULL) {
+            *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
+            return p;
+          }
         }
       }
     }
diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c
index 07f48277..1ee17556 100644
--- a/src/bitmap.inc.c
+++ b/src/bitmap.inc.c
@@ -67,8 +67,8 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
 static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
   mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
   mi_assert_internal(count > 0);
-  if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
-  if (count == 0) return 0;
+  //if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
+  //if (count == 0) return 0;
   return ((((uintptr_t)1 << count) - 1) << bitidx);
 }
 
@@ -145,6 +145,7 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
 {
   mi_assert_internal(bitmap_idx != NULL);
   mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
+  mi_assert_internal(count > 0);
   _Atomic(uintptr_t)* field = &bitmap[idx];
   uintptr_t map  = mi_atomic_load_relaxed(field);
   if (map==MI_BITMAP_FIELD_FULL) return false; // short cut

From 97629cefaa53dbd05d142d6d4886d9b3093bd22d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 8 Sep 2020 11:12:23 -0700
Subject: [PATCH 086/352] tune performance options with longer reset delay

---
 src/options.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/options.c b/src/options.c
index fe94a1fb..ecbbf30d 100644
--- a/src/options.c
+++ b/src/options.c
@@ -81,12 +81,14 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
 #if defined(__NetBSD__)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-#else
+#elif defined(_WIN32)
   { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
+#else
+  { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
-  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit pages when not eager committed
-  { 250,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
-  { 500,  UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds
+  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
+  { 500,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
+  { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,   UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output

From 161f9a7751c29e6930430fba850f0f7cdda5583d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 8 Sep 2020 11:12:44 -0700
Subject: [PATCH 087/352] refactor arena allocation

---
 src/arena.c | 115 ++++++++++++++++++++++++++++------------------------
 1 file changed, 63 insertions(+), 52 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 8af6f9b9..e0524b77 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -136,7 +136,7 @@ static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BIT
 static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
 
-static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
+static mi_decl_noinline void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
   UNUSED(tld);
   UNUSED(commit);
 
@@ -194,7 +194,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool com
   return p;
 }
 
-static void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) {
+static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) {
   if (mi_commit_mask_is_empty(*cmask)) {
     // nothing
   }    
@@ -218,7 +218,7 @@ static void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t tot
   *cmask = mi_commit_mask_empty();
 }
 
-static void mi_cache_purge(mi_os_tld_t* tld) {
+static mi_decl_noinline void mi_cache_purge(mi_os_tld_t* tld) {
   UNUSED(tld);
   mi_msecs_t now = _mi_clock_now();
   size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
@@ -250,7 +250,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
   }
 }
 
-static bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) 
+static mi_decl_noinline bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld)
 {
   // only for segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
@@ -301,8 +301,8 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask
   Arena Allocation
 ----------------------------------------------------------- */
 
-static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
-                                 bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
+                                                  bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
   mi_bitmap_index_t bitmap_index;
   if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
@@ -333,6 +333,52 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
   return p;
 }
 
+static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{  
+  UNUSED_RELEASE(alignment);
+  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
+  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);  
+  const size_t bcount = mi_block_count_of_size(size);
+  if (mi_likely(max_arena == 0)) return NULL;
+  mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+
+  // try numa affine allocation
+  for (size_t i = 0; i < max_arena; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    if (arena==NULL) break; // end reached
+    if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
+      (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+    {
+      bool acommit = commit;
+      void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
+      mi_assert_internal((uintptr_t)p % alignment == 0);
+      if (p != NULL) {
+        *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
+        return p;
+      }
+    }
+  }
+
+  // try from another numa node instead..
+  for (size_t i = 0; i < max_arena; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    if (arena==NULL) break; // end reached
+    if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
+      (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+    {
+      bool acommit = commit;
+      void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
+      mi_assert_internal((uintptr_t)p % alignment == 0);
+      if (p != NULL) {
+        *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
+        return p;
+      }
+    }
+  }
+  return NULL;
+}
+
+
 void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
                               bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero,
                               size_t* memid, mi_os_tld_t* tld)
@@ -343,60 +389,25 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
   *is_zero = false;
 
   bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`
+  if (large==NULL) large = &default_large;     // ensure `large != NULL`
   const int numa_node = _mi_os_numa_node(tld); // current numa node
 
-  // try to allocate in an arena if the alignment is small enough
-  // and the object is not too large or too small.  
-  if (alignment <= MI_SEGMENT_ALIGN && size >= MI_ARENA_MIN_OBJ_SIZE) {
-    const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
-    if (mi_unlikely(max_arena > 0)) {
-      const size_t bcount = mi_block_count_of_size(size);
-      mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
-      // try numa affine allocation
-      for (size_t i = 0; i < max_arena; i++) {
-        mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-        if (arena==NULL) break; // end reached
-        if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-        {
-          bool acommit = commit;
-          void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
-          mi_assert_internal((uintptr_t)p % alignment == 0);
-          if (p != NULL) {
-            *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
-            return p;
-          }
-        }
-      }
-      // try from another numa node instead..
-      for (size_t i = 0; i < max_arena; i++) {
-        mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-        if (arena==NULL) break; // end reached
-        if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-        {
-          bool acommit = commit;
-          void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
-          mi_assert_internal((uintptr_t)p % alignment == 0);
-          if (p != NULL) {
-            *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
-            return p;
-          }
-        }
-      }
-    }
+  // try to get from the cache 
+  if (size == MI_SEGMENT_SIZE && alignment <= MI_SEGMENT_ALIGN) {
+    void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld);
+    if (p != NULL) return p;
   }
 
-  // try to get from the cache 
-  void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld);
-  if (p != NULL) return p;
-
+  // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
+  if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) {
+    void* p = mi_arena_allocate(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld);
+    if (p != NULL) return p;
+  }
 
   // finally, fall back to the OS
   *is_zero = true;
   *memid   = MI_MEMID_OS;
-  p = _mi_os_alloc_aligned(size, alignment, commit, large, tld->stats);
+  void* p = _mi_os_alloc_aligned(size, alignment, commit, large, tld->stats);
   *commit_mask = ((p!=NULL && commit) ? mi_commit_mask_full() : mi_commit_mask_empty());
   return p;
 }

From 037285ac0980b648d5c4ad6b359ac57d5f21e543 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 8 Sep 2020 13:27:34 -0700
Subject: [PATCH 088/352] refactor segment cache and map in a separate source
 file

---
 CMakeLists.txt                               |   1 +
 ide/vs2017/mimalloc-override.vcxproj         |   1 +
 ide/vs2017/mimalloc-override.vcxproj.filters |   3 +
 ide/vs2017/mimalloc.vcxproj                  |   1 +
 ide/vs2017/mimalloc.vcxproj.filters          |   3 +
 ide/vs2019/mimalloc-override.vcxproj         |   1 +
 ide/vs2019/mimalloc-override.vcxproj.filters |   5 +-
 ide/vs2019/mimalloc.vcxproj                  |   1 +
 ide/vs2019/mimalloc.vcxproj.filters          |   3 +
 include/mimalloc-internal.h                  |  29 +-
 include/mimalloc-types.h                     |   1 +
 src/arena.c                                  | 229 +-------------
 src/bitmap.c                                 |   2 +-
 src/segment-cache.c                          | 310 +++++++++++++++++++
 src/segment.c                                | 152 +--------
 src/static.c                                 |   1 +
 16 files changed, 378 insertions(+), 365 deletions(-)
 create mode 100644 src/segment-cache.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index acd16a98..a9fdb259 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,7 @@ set(mi_sources
     src/os.c
     src/bitmap.c
     src/arena.c
+    src/segment-cache.c
     src/segment.c
     src/page.c
     src/alloc.c
diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index ab02a658..a87b69ac 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -246,6 +246,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters
index 2544c06b..d01f9311 100644
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@@ -79,5 +79,8 @@
     <ClCompile Include="..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\segment-cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj
index f212b619..41fb77c1 100644
--- a/ide/vs2017/mimalloc.vcxproj
+++ b/ide/vs2017/mimalloc.vcxproj
@@ -242,6 +242,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />
diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters
index 128126c1..05417645 100644
--- a/ide/vs2017/mimalloc.vcxproj.filters
+++ b/ide/vs2017/mimalloc.vcxproj.filters
@@ -62,6 +62,9 @@
     <ClCompile Include="..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\segment-cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index 000958ee..4136e574 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -246,6 +246,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters
index 38e83578..d6b7b5a9 100644
--- a/ide/vs2019/mimalloc-override.vcxproj.filters
+++ b/ide/vs2019/mimalloc-override.vcxproj.filters
@@ -46,6 +46,9 @@
     <ClCompile Include="..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\segment-cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
@@ -67,7 +70,7 @@
       <Filter>Header Files</Filter>
     </ClInclude>
     <ClInclude Include="..\..\src\bitmap.h">
-      <Filter>Source Files</Filter>
+      <Filter>Header Files</Filter>
     </ClInclude>
   </ItemGroup>
   <ItemGroup>
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index b1184cb5..98dee520 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -234,6 +234,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />
diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters
index 8d071d29..92be7cb4 100644
--- a/ide/vs2019/mimalloc.vcxproj.filters
+++ b/ide/vs2019/mimalloc.vcxproj.filters
@@ -49,6 +49,9 @@
     <ClCompile Include="..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\segment-cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 7ddfa38f..11733c66 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -62,19 +62,24 @@ void       _mi_os_init(void);                                      // called fro
 void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 
-bool      _mi_os_protect(void* addr, size_t size);
-bool      _mi_os_unprotect(void* addr, size_t size);
-bool      _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
-bool      _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
-bool      _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
-bool      _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-size_t    _mi_os_good_alloc_size(size_t size);
+bool       _mi_os_protect(void* addr, size_t size);
+bool       _mi_os_unprotect(void* addr, size_t size);
+bool       _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
+bool       _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
+bool       _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
+bool       _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+size_t     _mi_os_good_alloc_size(size_t size);
 
 // arena.c
-void*     _mi_arena_alloc_aligned(size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-void*     _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-void      _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld);
+void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld);
 
+// "segment-cache.c"
+void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld);
+void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
+void       _mi_segment_map_freed_at(const mi_segment_t* segment);
 
 // "segment.c"
 mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
@@ -463,6 +468,10 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
   return mi_page_block_size(page) - MI_PADDING_SIZE;
 }
 
+// size of a segment
+static inline size_t mi_segment_size(mi_segment_t* segment) {
+  return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
+}
 
 // Thread free access
 static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 06ab1ebe..8524de8a 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -114,6 +114,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Derived constants
 #define MI_SEGMENT_SIZE                   (1ULL<<MI_SEGMENT_SHIFT)
+#define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
 #define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
 #define MI_SEGMENT_SLICE_SIZE             (1ULL<< MI_SEGMENT_SLICE_SHIFT)
 #define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
diff --git a/src/arena.c b/src/arena.c
index e0524b77..5fd838a3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1,6 +1,5 @@
-
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019, Microsoft Research, Daan Leijen
+Copyright (c) 2019, 2020, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -49,7 +48,6 @@ bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 // Block info: bit 0 contains the `in_use` bit, the upper bits the
 // size in count of arena blocks.
 typedef uintptr_t mi_block_info_t;
-#define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
 #define MI_ARENA_BLOCK_SIZE   MI_SEGMENT_SIZE          // 8MiB
 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 4MiB
 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
@@ -113,190 +111,6 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 }
 
 
-/* -----------------------------------------------------------
-  Arena cache
------------------------------------------------------------ */
-
-#define MI_CACHE_FIELDS     (16)
-#define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
-#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)
-
-typedef struct mi_cache_slot_s {
-  void*               p;
-  size_t              memid;
-  mi_commit_mask_t    commit_mask;
-  _Atomic(mi_msecs_t) expire;
-} mi_cache_slot_t;
-
-static mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
-
-#define BITS_SET()  ATOMIC_VAR_INIT(UINTPTR_MAX)
-static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
-static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
-static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
-
-
-static mi_decl_noinline void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
-  UNUSED(tld);
-  UNUSED(commit);
-
-  // only segment blocks
-  if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL;
-
-  // numa node determines start field
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // find an available slot
-  mi_bitmap_index_t bitidx = 0;
-  bool claimed = false;
-  if (*large) {  // large allowed?
-    claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-    if (claimed) *large = true;
-  }
-  if (!claimed) {
-    claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-    if (claimed) *large = false;
-  }
-
-  if (!claimed) return NULL;
-
-  // found a slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  void* p = slot->p;
-  *memid = slot->memid;
-  *is_zero = false;
-  mi_commit_mask_t cmask = slot->commit_mask;  // copy
-  slot->p = NULL;
-  mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
-  // ignore commit request
-  /*
-  if (commit && !mi_commit_mask_is_full(cmask)) {
-    bool commit_zero;
-    bool ok = _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); // todo: only commit needed parts?
-    if (!ok) {
-      *commit_mask = cmask;
-    }
-    else {
-      *commit_mask = mi_commit_mask_full();
-    }
-  }
-  else {
-  */
-  *commit_mask = cmask;
-  
-  // mark the slot as free again
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
-  _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
-  return p;
-}
-
-static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) {
-  if (mi_commit_mask_is_empty(*cmask)) {
-    // nothing
-  }    
-  else if (mi_commit_mask_is_full(*cmask)) {
-    _mi_os_decommit(p, total, stats);
-  }
-  else {
-    // todo: one call to decommit the whole at once?
-    mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
-    size_t    part = total/MI_COMMIT_MASK_BITS;
-    uintptr_t idx;
-    uintptr_t count;
-    mi_commit_mask_t mask = *cmask;
-    mi_commit_mask_foreach(mask, idx, count) {
-      void*  start = (uint8_t*)p + (idx*part);
-      size_t size = count*part;
-      _mi_os_decommit(start, size, stats);
-    }
-    mi_commit_mask_foreach_end()
-  }
-  *cmask = mi_commit_mask_empty();
-}
-
-static mi_decl_noinline void mi_cache_purge(mi_os_tld_t* tld) {
-  UNUSED(tld);
-  mi_msecs_t now = _mi_clock_now();
-  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
-  size_t purged = 0;
-  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
-    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
-    mi_cache_slot_t* slot = &cache[idx];
-    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
-    if (expire != 0 && now >= expire) {  // racy read
-      // seems expired, first claim it from available
-      purged++;
-      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
-      if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
-        // was available, we claimed it
-        expire = mi_atomic_loadi64_acquire(&slot->expire);
-        if (expire != 0 && now >= expire) {  // safe read
-          // still expired, decommit it
-          mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-          mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
-          _mi_abandoned_await_readers();  // wait until safe to decommit
-          // decommit committed parts
-          mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
-          //_mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats);
-        }
-        _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
-      }
-      if (purged > 4) break;  // bound to no more than 4 purge tries per push
-    }
-  }
-}
-
-static mi_decl_noinline bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld)
-{
-  // only for segment blocks
-  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
-  
-  // numa node determines start field
-  int numa_node = _mi_os_numa_node(NULL);
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // purge expired entries
-  mi_cache_purge(tld);
-
-  // find an available slot
-  mi_bitmap_index_t bitidx;
-  bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-  if (!claimed) return false;
-
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
-
-  // set the slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  slot->p = start;
-  slot->memid = memid;
-  mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-  slot->commit_mask = commit_mask;
-  if (!mi_commit_mask_is_empty(commit_mask) && !is_large) {
-    long delay = mi_option_get(mi_option_arena_reset_delay);
-    if (delay == 0) {
-      _mi_abandoned_await_readers(); // wait until safe to decommit
-      mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
-    }
-    else {
-      mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
-    }
-  }
-  
-  // make it available
-  _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
-  return true;
-}
-
-
 /* -----------------------------------------------------------
   Arena Allocation
 ----------------------------------------------------------- */
@@ -333,7 +147,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
   return p;
 }
 
-static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {  
   UNUSED_RELEASE(alignment);
   mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
@@ -349,11 +163,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
     if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
       (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
     {
-      bool acommit = commit;
-      void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
+      void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
       mi_assert_internal((uintptr_t)p % alignment == 0);
       if (p != NULL) {
-        *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
         return p;
       }
     }
@@ -366,11 +178,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
     if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
       (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
     {
-      bool acommit = commit;
-      void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld);
+      void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
       mi_assert_internal((uintptr_t)p % alignment == 0);
       if (p != NULL) {
-        *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty());
         return p;
       }
     }
@@ -379,11 +189,10 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
 }
 
 
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
-                              bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero,
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero,
                               size_t* memid, mi_os_tld_t* tld)
 {
-  mi_assert_internal(commit_mask != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
+  mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
   *memid   = MI_MEMID_OS;
   *is_zero = false;
@@ -392,49 +201,35 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
   if (large==NULL) large = &default_large;     // ensure `large != NULL`
   const int numa_node = _mi_os_numa_node(tld); // current numa node
 
-  // try to get from the cache 
-  if (size == MI_SEGMENT_SIZE && alignment <= MI_SEGMENT_ALIGN) {
-    void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld);
-    if (p != NULL) return p;
-  }
-
   // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
   if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) {
-    void* p = mi_arena_allocate(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld);
+    void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_zero, memid, tld);
     if (p != NULL) return p;
   }
 
   // finally, fall back to the OS
   *is_zero = true;
   *memid   = MI_MEMID_OS;
-  void* p = _mi_os_alloc_aligned(size, alignment, commit, large, tld->stats);
-  *commit_mask = ((p!=NULL && commit) ? mi_commit_mask_full() : mi_commit_mask_empty());
-  return p;
+  return _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats);
 }
 
-void* _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) 
+void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) 
 {
-  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, commit_mask, large, is_zero, memid, tld);
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld);
 }
 
 /* -----------------------------------------------------------
   Arena free
 ----------------------------------------------------------- */
 
-void _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) {
+void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld) {
   mi_assert_internal(size > 0 && tld->stats != NULL);
   if (p==NULL) return;
   if (size==0) return;
 
   if (memid == MI_MEMID_OS) {
     // was a direct OS allocation, pass through
-    if (!mi_cache_push(p, size, memid, commit_mask, is_large, tld)) {
-      _mi_abandoned_await_readers(); // wait until safe to free
-      // TODO: is it safe on all platforms to free even it contains decommitted parts? (eg. macOS)
-      const size_t csize = mi_commit_mask_committed_size(commit_mask, size);
-      _mi_stat_decrease(&_mi_stats_main.committed, csize);
-      _mi_os_free_ex(p, size, false /*pretend decommitted to not double count stats*/, tld->stats);
-    }
+    _mi_os_free_ex(p, size, is_committed, tld->stats);
   }
   else {
     // allocated in an arena
diff --git a/src/bitmap.c b/src/bitmap.c
index 93487a32..ad5a9552 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -40,7 +40,7 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
 
 // Try to atomically claim a sequence of `count` bits in a single
 // field at `idx` in `bitmap`. Returns `true` on success.
-bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
+inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
 {
   mi_assert_internal(bitmap_idx != NULL);
   mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
diff --git a/src/segment-cache.c b/src/segment-cache.c
new file mode 100644
index 00000000..569e878a
--- /dev/null
+++ b/src/segment-cache.c
@@ -0,0 +1,310 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2020, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+  Implements a cache of segments to avoid expensive OS calls
+  and also the full memory map of all segments.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+#include "mimalloc-atomic.h"
+
+#include "bitmap.h"  // atomic bitmap
+
+#define MI_CACHE_FIELDS     (16)
+#define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
+
+#define BITS_SET()          ATOMIC_VAR_INIT(UINTPTR_MAX)
+#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)
+
+typedef struct mi_cache_slot_s {
+  void*               p;
+  size_t              memid;
+  mi_commit_mask_t    commit_mask;
+  _Atomic(mi_msecs_t) expire;
+} mi_cache_slot_t;
+
+static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
+
+static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
+static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
+static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
+
+
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{
+  // only segment blocks
+  if (size != MI_SEGMENT_SIZE) return NULL;
+
+  // numa node determines start field
+  const int numa_node = _mi_os_numa_node(tld);
+  size_t start_field = 0;
+  if (numa_node > 0) {
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
+  }
+
+  // find an available slot
+  mi_bitmap_index_t bitidx = 0;
+  bool claimed = false;
+  if (*large) {  // large allowed?
+    claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    if (claimed) *large = true;
+  }
+  if (!claimed) {
+    claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    if (claimed) *large = false;
+  }
+
+  if (!claimed) return NULL;
+
+  // found a slot
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  void* p = slot->p;
+  *memid = slot->memid;
+  *is_zero = false;
+  mi_commit_mask_t cmask = slot->commit_mask;  // copy
+  slot->p = NULL;
+  mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
+  *commit_mask = cmask;
+  
+  // mark the slot as free again
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
+  _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
+  return p;
+}
+
+static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
+{
+  if (mi_commit_mask_is_empty(*cmask)) {
+    // nothing
+  }    
+  else if (mi_commit_mask_is_full(*cmask)) {
+    _mi_os_decommit(p, total, stats);
+  }
+  else {
+    // todo: one call to decommit the whole at once?
+    mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
+    size_t    part = total/MI_COMMIT_MASK_BITS;
+    uintptr_t idx;
+    uintptr_t count;
+    mi_commit_mask_t mask = *cmask;
+    mi_commit_mask_foreach(mask, idx, count) {
+      void*  start = (uint8_t*)p + (idx*part);
+      size_t size = count*part;
+      _mi_os_decommit(start, size, stats);
+    }
+    mi_commit_mask_foreach_end()
+  }
+  *cmask = mi_commit_mask_empty();
+}
+
+#define MI_MAX_PURGE_PER_PUSH  (4)
+
+static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) 
+{
+  UNUSED(tld);
+  mi_msecs_t now = _mi_clock_now();
+  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
+  size_t purged = 0;
+  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
+    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
+    mi_cache_slot_t* slot = &cache[idx];
+    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
+    if (expire != 0 && now >= expire) {  // racy read
+      // seems expired, first claim it from available
+      purged++;
+      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
+      if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
+        // was available, we claimed it
+        expire = mi_atomic_loadi64_acquire(&slot->expire);
+        if (expire != 0 && now >= expire) {  // safe read
+          // still expired, decommit it
+          mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
+          mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+          _mi_abandoned_await_readers();  // wait until safe to decommit
+          // decommit committed parts
+          // TODO: instead of decommit, we could also free to the OS?
+          mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
+        }
+        _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
+      }
+      if (purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
+    }
+  }
+}
+
+mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld)
+{
+  // only for normal segment blocks
+  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
+  
+  // numa node determines start field
+  int numa_node = _mi_os_numa_node(NULL);
+  size_t start_field = 0;
+  if (numa_node > 0) {
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
+  }
+
+  // purge expired entries
+  mi_segment_cache_purge(tld);
+
+  // find an available slot
+  mi_bitmap_index_t bitidx;
+  bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+  if (!claimed) return false;
+
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+
+  // set the slot
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  slot->p = start;
+  slot->memid = memid;
+  mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
+  slot->commit_mask = commit_mask;
+  if (!mi_commit_mask_is_empty(commit_mask) && !is_large) {
+    long delay = mi_option_get(mi_option_arena_reset_delay);
+    if (delay == 0) {
+      _mi_abandoned_await_readers(); // wait until safe to decommit
+      mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
+    }
+    else {
+      mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
+    }
+  }
+  
+  // make it available
+  _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
+  return true;
+}
+
+
+/* -----------------------------------------------------------
+  The following functions are to reliably find the segment or
+  block that encompasses any pointer p (or NULL if it is not
+  in any of our segments).
+  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
+  set to 1 if it contains the segment meta data.
+----------------------------------------------------------- */
+
+
+#if (MI_INTPTR_SIZE==8)
+#define MI_MAX_ADDRESS    ((size_t)20 << 40)  // 20TB
+#else
+#define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
+#endif
+
+#define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
+#define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
+#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
+
+static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE];  // 2KiB per TB with 64MiB segments
+
+static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
+  mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
+  uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE;
+  *bitidx = segindex % (8*MI_INTPTR_SIZE);
+  return (segindex / (8*MI_INTPTR_SIZE));
+}
+
+void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
+  if (index==0) return;
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  uintptr_t newmask;
+  do {
+    newmask = (mask | ((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+void _mi_segment_map_freed_at(const mi_segment_t* segment) {
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
+  if (index == 0) return;
+  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  uintptr_t newmask;
+  do {
+    newmask = (mask & ~((uintptr_t)1 << bitidx));
+  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
+static mi_segment_t* _mi_segment_of(const void* p) {
+  mi_segment_t* segment = _mi_ptr_segment(p);
+  size_t bitidx;
+  size_t index = mi_segment_map_index_of(segment, &bitidx);
+  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
+  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+  if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) {
+    return segment; // yes, allocated by us
+  }
+  if (index==0) return NULL;
+  // search downwards for the first segment in case it is an interior pointer
+  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough 
+  // valid huge objects
+  // note: we could maintain a lowest index to speed up the path for invalid pointers?
+  size_t lobitidx;
+  size_t loindex;
+  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
+  if (lobits != 0) {
+    loindex = index;
+    lobitidx = mi_bsr(lobits);    // lobits != 0
+  }
+  else {
+    uintptr_t lomask = mask;
+    loindex = index - 1;
+    while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--;
+    if (loindex==0) return NULL;
+    lobitidx = mi_bsr(lomask);    // lomask != 0
+  }
+  // take difference as the addresses could be larger than the MAX_ADDRESS space.
+  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
+  segment = (mi_segment_t*)((uint8_t*)segment - diff);
+
+  if (segment == NULL) return NULL;
+  mi_assert_internal((void*)segment < p);
+  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
+  mi_assert_internal(cookie_ok);
+  if (mi_unlikely(!cookie_ok)) return NULL;
+  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
+  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
+  return segment;
+}
+
+// Is this a valid pointer in our heap?
+static bool  mi_is_valid_pointer(const void* p) {
+  return (_mi_segment_of(p) != NULL);
+}
+
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return mi_is_valid_pointer(p);
+}
+
+/*
+// Return the full segment range belonging to a pointer
+static void* mi_segment_range_of(const void* p, size_t* size) {
+  mi_segment_t* segment = _mi_segment_of(p);
+  if (segment == NULL) {
+    if (size != NULL) *size = 0;
+    return NULL;
+  }
+  else {
+    if (size != NULL) *size = segment->segment_size;
+    return segment;
+  }
+  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
+  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
+  mi_reset_delayed(tld);
+  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
+  return page;
+}
+*/
diff --git a/src/segment.c b/src/segment.c
index b010fcbb..a1a38a64 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -13,8 +13,6 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #define MI_PAGE_HUGE_ALIGN  (256*1024)
 
-static void mi_segment_map_allocated_at(const mi_segment_t* segment);
-static void mi_segment_map_freed_at(const mi_segment_t* segment);
 static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
 
 /* --------------------------------------------------------------------------------
@@ -183,11 +181,6 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
  Segment size calculations
 ----------------------------------------------------------- */
 
-
-static size_t mi_segment_size(mi_segment_t* segment) {
-  return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
-}
-
 static size_t mi_segment_info_size(mi_segment_t* segment) {
   return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
 }
@@ -249,7 +242,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
 
 static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   segment->thread_id = 0;
-  mi_segment_map_freed_at(segment);
+  _mi_segment_map_freed_at(segment);
   mi_segments_track_size(-((long)mi_segment_size(segment)),tld);
   if (MI_SECURE>0) {
     // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
@@ -264,7 +257,13 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // mi_segment_delayed_decommit(segment,true,tld->stats);
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
-  _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->commit_mask, segment->mem_is_fixed, tld->os);
+  const size_t size = mi_segment_size(segment);
+  if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_fixed, tld->os)) {
+    const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size);
+    if (csize > 0 && !segment->mem_is_fixed) _mi_stat_decrease(&_mi_stats_main.committed, csize);
+    _mi_abandoned_await_readers();  // wait until safe to free
+    _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_fixed /* pretend not committed to not double count decommits */, tld->os);
+  }
 }
 
 
@@ -647,7 +646,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
   const bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
-  const bool commit = eager || (required > 0); 
+  bool commit = eager || (required > 0); 
   
   // Try to get from our cache first
   bool is_zero = false;
@@ -657,8 +656,12 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     // Allocate the segment from the OS
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
     size_t memid = 0;
-    segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &commit_mask, &mem_large, &is_zero, &memid, os_tld);
-    if (segment == NULL) return NULL;  // failed to allocate
+    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &mem_large, &is_zero, &memid, os_tld);
+    if (segment==NULL) {
+      segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
+      if (segment == NULL) return NULL;  // failed to allocate
+      commit_mask = (commit ? mi_commit_mask_full() : mi_commit_mask_empty());
+    }    
     mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
     const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
@@ -674,7 +677,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     segment->mem_is_fixed = mem_large;
     segment->mem_is_committed = mi_commit_mask_is_full(commit_mask);
     mi_segments_track_size((long)(segment_size), tld);
-    mi_segment_map_allocated_at(segment);
+    _mi_segment_map_allocated_at(segment);
   }
 
   // zero the segment info? -- not always needed as it is zero initialized from the OS 
@@ -1368,126 +1371,3 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment
 }
 
 
-/* -----------------------------------------------------------
-  The following functions are to reliably find the segment or
-  block that encompasses any pointer p (or NULL if it is not
-  in any of our segments).
-  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
-  set to 1 if it contains the segment meta data.
------------------------------------------------------------ */
-
-
-#if (MI_INTPTR_SIZE==8)
-#define MI_MAX_ADDRESS    ((size_t)20 << 40)  // 20TB
-#else
-#define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
-#endif
-
-#define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
-#define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
-#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
-
-static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE];  // 2KiB per TB with 64MiB segments
-
-static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
-  mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
-  uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE;
-  *bitidx = segindex % (8*MI_INTPTR_SIZE);
-  return (segindex / (8*MI_INTPTR_SIZE));
-}
-
-static void mi_segment_map_allocated_at(const mi_segment_t* segment) {
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
-  if (index==0) return;
-  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  uintptr_t newmask;
-  do {
-    newmask = (mask | ((uintptr_t)1 << bitidx));
-  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
-}
-
-static void mi_segment_map_freed_at(const mi_segment_t* segment) {
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
-  if (index == 0) return;
-  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  uintptr_t newmask;
-  do {    
-    newmask = (mask & ~((uintptr_t)1 << bitidx));
-  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
-}
-
-// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
-static mi_segment_t* _mi_segment_of(const void* p) {
-  mi_segment_t* segment = _mi_ptr_segment(p);
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
-  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) {
-    return segment; // yes, allocated by us
-  }
-  if (index==0) return NULL;
-  // search downwards for the first segment in case it is an interior pointer
-  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough 
-  // valid huge objects
-  // note: we could maintain a lowest index to speed up the path for invalid pointers?
-  size_t lobitidx;
-  size_t loindex;
-  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
-  if (lobits != 0) {
-    loindex = index;
-    lobitidx = mi_bsr(lobits);    // lobits != 0
-  }
-  else {
-    uintptr_t lomask = mask;
-    loindex = index - 1;
-    while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--;
-    if (loindex==0) return NULL;
-    lobitidx = mi_bsr(lomask);    // lomask != 0
-  }
-  // take difference as the addresses could be larger than the MAX_ADDRESS space.
-  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
-  segment = (mi_segment_t*)((uint8_t*)segment - diff);
-
-  if (segment == NULL) return NULL;
-  mi_assert_internal((void*)segment < p);
-  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
-  mi_assert_internal(cookie_ok);
-  if (mi_unlikely(!cookie_ok)) return NULL;
-  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
-  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
-  return segment;
-}
-
-// Is this a valid pointer in our heap?
-static bool  mi_is_valid_pointer(const void* p) {
-  return (_mi_segment_of(p) != NULL);
-}
-
-bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  return mi_is_valid_pointer(p);
-}
-
-/*
-// Return the full segment range belonging to a pointer
-static void* mi_segment_range_of(const void* p, size_t* size) {
-  mi_segment_t* segment = _mi_segment_of(p);
-  if (segment == NULL) {
-    if (size != NULL) *size = 0;
-    return NULL;
-  }
-  else {
-    if (size != NULL) *size = segment->segment_size;
-    return segment;
-  }
-  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
-  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
-  mi_reset_delayed(tld);
-  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
-  return page;
-}
-*/
diff --git a/src/static.c b/src/static.c
index e53aff1d..79c0a033 100644
--- a/src/static.c
+++ b/src/static.c
@@ -25,6 +25,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "os.c"
 #include "bitmap.c"
 #include "arena.c"
+#include "segment-cache.c"
 #include "segment.c"
 #include "page.c"
 #include "heap.c"

From d87933a3b5f0a8b1d49b4f6ab284e061931957e6 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 8 Sep 2020 15:50:37 -0700
Subject: [PATCH 089/352] update comments

---
 src/segment-cache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 569e878a..e7369bb3 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -6,8 +6,9 @@ terms of the MIT license. A copy of the license can be found in the file
 -----------------------------------------------------------------------------*/
 
 /* ----------------------------------------------------------------------------
-  Implements a cache of segments to avoid expensive OS calls
-  and also the full memory map of all segments.
+  Implements a cache of segments to avoid expensive OS calls and to reuse
+  the commit_mask to optimize the commit/decommit calls.
+  The full memory map of all segments is also implemented here.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
 #include "mimalloc-internal.h"

From 1d946146cc37595f277c7d781804fbc3cc49b9b7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 11 Sep 2020 10:40:22 -0700
Subject: [PATCH 090/352] fix all_committed

---
 src/arena.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 97c287d2..1f970753 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -228,14 +228,14 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, b
   Arena free
 ----------------------------------------------------------- */
 
-void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld) {
+void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_os_tld_t* tld) {
   mi_assert_internal(size > 0 && tld->stats != NULL);
   if (p==NULL) return;
   if (size==0) return;
 
   if (memid == MI_MEMID_OS) {
     // was a direct OS allocation, pass through
-    _mi_os_free_ex(p, size, is_committed, tld->stats);
+    _mi_os_free_ex(p, size, all_committed, tld->stats);
   }
   else {
     // allocated in an arena
@@ -258,7 +258,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os
     }
     // potentially decommit
     if (arena->is_committed) {
-      mi_assert_internal(all_committed); 
+      mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c)
     }
     else {
       mi_assert_internal(arena->blocks_committed != NULL);

From 01307a25ffaf6991023f770a24bce72552dfc852 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 11 Sep 2020 11:00:19 -0700
Subject: [PATCH 091/352] fix assertion

---
 src/bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bitmap.c b/src/bitmap.c
index 08289264..ea55d33e 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -178,7 +178,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
   _Atomic(uintptr_t)* field = &bitmap[idx];
   uintptr_t map = mi_atomic_load_relaxed(field);  
   const size_t initial = mi_clz(map);  // count of initial zeros starting at idx
-  mi_assert_internal(initial >= 0 && initial <= MI_BITMAP_FIELD_BITS);
+  mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
   if (initial == 0)     return false;
   if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx);     // no need to cross fields
   if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries

From b1cc3d550c212444cba6df8d0f12bd7cd75dd487 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Mon, 14 Sep 2020 10:42:47 -0700
Subject: [PATCH 092/352] fix valid pointer detection on mac

---
 src/segment-cache.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index e16c9e4a..f1ed5c06 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -74,7 +74,7 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
   slot->p = NULL;
   mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
   *commit_mask = cmask;
-  
+
   // mark the slot as free again
   mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
   _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
@@ -85,7 +85,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 {
   if (mi_commit_mask_is_empty(*cmask)) {
     // nothing
-  }    
+  }
   else if (mi_commit_mask_is_full(*cmask)) {
     _mi_os_decommit(p, total, stats);
   }
@@ -108,7 +108,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 
 #define MI_MAX_PURGE_PER_PUSH  (4)
 
-static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) 
+static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
 {
   UNUSED(tld);
   mi_msecs_t now = _mi_clock_now();
@@ -145,7 +145,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
 {
   // only for normal segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
-  
+
   // numa node determines start field
   int numa_node = _mi_os_numa_node(NULL);
   size_t start_field = 0;
@@ -187,7 +187,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
       mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
     }
   }
-  
+
   // make it available
   _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
   return true;
@@ -217,9 +217,15 @@ static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE];  // 2KiB per TB w
 
 static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
   mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
-  uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE;
-  *bitidx = segindex % (8*MI_INTPTR_SIZE);
-  return (segindex / (8*MI_INTPTR_SIZE));
+  if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
+    *bitidx = 0;
+    return 0;
+  }
+  else {
+    uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
+    *bitidx = segindex % MI_INTPTR_BITS;
+    return (segindex / MI_INTPTR_BITS);
+  }
 }
 
 void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
@@ -257,8 +263,11 @@ static mi_segment_t* _mi_segment_of(const void* p) {
     return segment; // yes, allocated by us
   }
   if (index==0) return NULL;
+
+  // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
+
   // search downwards for the first segment in case it is an interior pointer
-  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough 
+  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
   // valid huge objects
   // note: we could maintain a lowest index to speed up the path for invalid pointers?
   size_t lobitidx;

From fbaa70e1eb5f7c797c4298f4d7a44f5e71ae06b2 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 14 Sep 2020 11:01:17 -0700
Subject: [PATCH 093/352] increase default test load to 25% to increase azure
 pipeline test load

---
 test/test-stress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index 271bea85..46e1eac4 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -26,7 +26,7 @@ terms of the MIT license.
 //
 // argument defaults
 static int THREADS = 32;      // more repeatable if THREADS <= #processors
-static int SCALE   = 10;      // scaling factor
+static int SCALE   = 25;      // scaling factor
 static int ITER    = 50;      // N full iterations destructing and re-creating all threads
 
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors

From b149099bf328ef6ddb11da02bc64f67da8ff8694 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 24 Sep 2020 16:55:00 -0700
Subject: [PATCH 094/352] use relaxed load for last search position in an arena

---
 src/arena.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 1f970753..55700dc2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -103,9 +103,9 @@ static size_t mi_block_count_of_size(size_t size) {
 ----------------------------------------------------------- */
 static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
 {
-  size_t idx = mi_atomic_load_acquire(&arena->search_idx);  // start from last search
+  size_t idx = mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
   if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
-    mi_atomic_store_release(&arena->search_idx, idx);  // start search from here next time
+    mi_atomic_store_relaxed(&arena->search_idx, idx);  // start search from here next time
     return true;
   };
   return false;

From e1c38eef76cb8bb3e49a32073c5c579afaae48fa Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 24 Sep 2020 17:20:39 -0700
Subject: [PATCH 095/352] use allow_decommit option for both the segment cache
 and pages

---
 include/mimalloc.h  | 2 +-
 src/options.c       | 4 ++--
 src/segment-cache.c | 4 ++--
 src/segment.c       | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 7b6d5a0b..90e7675d 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -314,7 +314,7 @@ typedef enum mi_option_e {
   mi_option_eager_commit_delay,
   mi_option_allow_decommit,
   mi_option_reset_delay,
-  mi_option_arena_reset_delay,
+  mi_option_segment_decommit_delay,
   mi_option_use_numa_nodes,
   mi_option_limit_os_alloc,
   mi_option_os_tag,
diff --git a/src/options.c b/src/options.c
index f81bb8af..9705a4cc 100644
--- a/src/options.c
+++ b/src/options.c
@@ -88,8 +88,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 500,  UNINIT, MI_OPTION(reset_delay) },       // reset delay in milli-seconds
-  { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds for freed segments
+  { 500,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
diff --git a/src/segment-cache.c b/src/segment-cache.c
index f1ed5c06..d1f49ab6 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -177,8 +177,8 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   slot->is_pinned = is_pinned;
   mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
   slot->commit_mask = commit_mask;
-  if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned) {
-    long delay = mi_option_get(mi_option_arena_reset_delay);
+  if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
+    long delay = mi_option_get(mi_option_segment_decommit_delay);
     if (delay == 0) {
       _mi_abandoned_await_readers(); // wait until safe to decommit
       mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
diff --git a/src/segment.c b/src/segment.c
index 8624f7e4..e64f8409 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -384,7 +384,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
     mi_assert_internal((void*)start != (void*)segment);
     mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask);
     _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
-    _mi_os_decommit(start, full_size, stats);  // ok if this fails
+    if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); } // ok if this fails
     mi_commit_mask_clear(&segment->commit_mask, mask);
   }
   // increase expiration of reusing part of the delayed decommit
@@ -422,7 +422,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
 }
 
 static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) {
-  if (mi_commit_mask_is_empty(segment->decommit_mask)) return;
+  if (!segment->allow_decommit || mi_commit_mask_is_empty(segment->decommit_mask)) return;
   mi_msecs_t now = _mi_clock_now();
   if (!force && now < segment->decommit_expire) return;
 

From ad058291953dbe00d7a7d7f786d17f7f4c563033 Mon Sep 17 00:00:00 2001
From: unknown <daanl@outlook.com>
Date: Fri, 6 Nov 2020 17:49:10 -0800
Subject: [PATCH 096/352] remove shadow warning when building in static mode

---
 src/segment.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 22aa48c6..5bbf283c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -246,10 +246,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   if (MI_SECURE>0) {
     // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
     // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted
-    size_t os_page_size = _mi_os_page_size();
-    _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_page_size, os_page_size);
-    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_page_size;
-    _mi_os_unprotect(end, os_page_size);
+    size_t os_pagesize = _mi_os_page_size();
+    _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize);
+    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize;
+    _mi_os_unprotect(end, os_pagesize);
   }
 
   // purge delayed decommits now? (no, leave it to the cache)
@@ -712,12 +712,12 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   if (MI_SECURE>0) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data
-    size_t os_page_size = _mi_os_page_size();    
-    mi_assert_internal(mi_segment_info_size(segment) - os_page_size >= pre_size);
-    _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_page_size, os_page_size);
-    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_page_size;
-    mi_segment_ensure_committed(segment, end, os_page_size, tld->stats);
-    _mi_os_protect(end, os_page_size);
+    size_t os_pagesize = _mi_os_page_size();    
+    mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size);
+    _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize);
+    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize;
+    mi_segment_ensure_committed(segment, end, os_pagesize, tld->stats);
+    _mi_os_protect(end, os_pagesize);
     if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-(
     guard_slices = 1;
   }

From 217871cb459e4915fefcef2a45f63caa48a60fb0 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 22 Jan 2021 11:24:25 -0800
Subject: [PATCH 097/352] fix search_idx start in managed arenas

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 55700dc2..0cd8aba3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -105,7 +105,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 {
   size_t idx = mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
   if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
-    mi_atomic_store_relaxed(&arena->search_idx, idx);  // start search from here next time
+    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around
     return true;
   };
   return false;

From e314699ee0de0c4a9b227970e8c2956f50a49af5 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Thu, 28 Jan 2021 17:32:42 -0800
Subject: [PATCH 098/352] add debug view of arenas

---
 include/mimalloc.h  |  1 +
 src/arena.c         | 31 +++++++++++++++++++++++++++++--
 src/segment-cache.c | 12 ++++++++++++
 test/test-stress.c  | 10 +++++++---
 4 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 904e10d4..4ecae58d 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -267,6 +267,7 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size
 mi_decl_export int  mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
 mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
 
+mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
 
 // deprecated
 mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
diff --git a/src/arena.c b/src/arena.c
index 0cd8aba3..ea3a1abd 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -49,7 +49,7 @@ bool  _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
 // Block info: bit 0 contains the `in_use` bit, the upper bits the
 // size in count of arena blocks.
 typedef uintptr_t mi_block_info_t;
-#define MI_ARENA_BLOCK_SIZE   MI_SEGMENT_SIZE          // 8MiB
+#define MI_ARENA_BLOCK_SIZE   (MI_SEGMENT_SIZE)        // 8MiB  (must be at least MI_SEGMENT_ALIGN)
 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 4MiB
 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
 
@@ -103,7 +103,7 @@ static size_t mi_block_count_of_size(size_t size) {
 ----------------------------------------------------------- */
 static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
 {
-  size_t idx = mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
+  size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
   if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
     mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around
     return true;
@@ -346,6 +346,33 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe
   return 0;
 }
 
+static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) {
+  size_t inuse_count = 0;
+  for (size_t i = 0; i < field_count; i++) {
+    char buf[MI_BITMAP_FIELD_BITS + 1];
+    uintptr_t field = mi_atomic_load_relaxed(&fields[i]);
+    for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) {
+      bool inuse = ((((uintptr_t)1 << bit) & field) != 0);
+      if (inuse) inuse_count++;
+      buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.');
+    }
+    buf[MI_BITMAP_FIELD_BITS] = 0;
+    _mi_verbose_message("%s%s\n", prefix, buf);
+  }
+  return inuse_count;
+}
+
+void mi_debug_show_arenas(void) mi_attr_noexcept {
+  size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count);
+  for (size_t i = 0; i < max_arenas; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+    if (arena == NULL) break;
+    size_t inuse_count = 0;
+    _mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count);
+    inuse_count += mi_debug_show_bitmap("  ", arena->blocks_inuse, arena->field_count);
+    _mi_verbose_message("  blocks in use ('x'): %zu\n", inuse_count);
+  }
+}
 
 /* -----------------------------------------------------------
   Reserve a huge page arena.
diff --git a/src/segment-cache.c b/src/segment-cache.c
index d1f49ab6..08517f4b 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -16,6 +16,8 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include "bitmap.h"  // atomic bitmap
 
+//#define MI_CACHE_DISABLE 1  
+
 #define MI_CACHE_FIELDS     (16)
 #define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
 
@@ -39,6 +41,10 @@ static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   //
 
 mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
+#ifdef MI_CACHE_DISABLE
+  return NULL;
+#else
+
   // only segment blocks
   if (size != MI_SEGMENT_SIZE) return NULL;
 
@@ -79,6 +85,7 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
   mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
   _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
   return p;
+#endif
 }
 
 static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
@@ -143,6 +150,10 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
 
 mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
 {
+#ifdef MI_CACHE_DISABLE
+  return false;
+#else
+
   // only for normal segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
 
@@ -191,6 +202,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   // make it available
   _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
   return true;
+#endif
 }
 
 
diff --git a/test/test-stress.c b/test/test-stress.c
index c69c08cc..fde0c791 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -181,14 +181,15 @@ static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
 static void test_stress(void) {
   uintptr_t r = rand();
   for (int n = 0; n < ITER; n++) {
-    run_os_threads(THREADS, &stress);
+    run_os_threads(THREADS, &stress);    
     for (int i = 0; i < TRANSFERS; i++) {
       if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
         void* p = atomic_exchange_ptr(&transfer[i], NULL);
         free_items(p);
       }
     }
-    // mi_collect(false);
+    //mi_collect(false);
+    //mi_debug_show_arenas();    
 #if !defined(NDEBUG) || defined(MI_TSAN)
     if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
 #endif
@@ -243,7 +244,9 @@ int main(int argc, char** argv) {
 
   // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
   srand(0x7feb352d);
-   
+  
+  //mi_reserve_os_memory(512ULL << 20, true, true);
+
 #if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
   mi_stats_reset();
 #endif
@@ -256,6 +259,7 @@ int main(int argc, char** argv) {
   
 #if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
   mi_collect(true);
+  //mi_debug_show_arenas();
 #endif  
 #ifndef USE_STD_MALLOC
   mi_stats_print(NULL);

From 3bade4b1bd96ea4d815f2bb40082ff38164354e5 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 29 Jan 2021 15:42:52 -0800
Subject: [PATCH 099/352] fix accounting of abandoned pages

---
 src/segment.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/segment.c b/src/segment.c
index 5bbf283c..d2902e69 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1104,6 +1104,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
       if (mi_page_all_free(page)) {
         // if this page is all free now, free it without adding to any queues (yet) 
         mi_assert_internal(page->next == NULL && page->prev==NULL);
+        _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
         segment->abandoned--;
         slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
         mi_assert_internal(!mi_slice_is_used(slice));

From bd56782f26cc54720484df3d86b36872c169dfba Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sun, 31 Jan 2021 14:02:06 -0800
Subject: [PATCH 100/352] bump version to 2.0.0

---
 cmake/mimalloc-config-version.cmake | 4 ++--
 include/mimalloc.h                  | 2 +-
 test/CMakeLists.txt                 | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index ed95c19e..58b23676 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,5 +1,5 @@
-set(mi_version_major 1)
-set(mi_version_minor 7)
+set(mi_version_major 2)
+set(mi_version_minor 0)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 8d1108a6..48594de7 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 170   // major + 2 digits minor
+#define MI_MALLOC_VERSION 200   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7392d20e..8d137e75 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE)
 endif()
 
 # Import mimalloc (if installed)
-find_package(mimalloc 1.7 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
+find_package(mimalloc 2.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
 message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}")
 
 # overriding with a dynamic library

From 47050371a1eb935a1571c1e17c3b142402ccc24e Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Mon, 22 Feb 2021 15:05:47 -0800
Subject: [PATCH 101/352] fix issue #363 and disable assertion for now

---
 src/init.c             |  5 ++++-
 src/segment.c          |  3 +--
 test/main-override.cpp | 27 +++++++++++++++++++++------
 3 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/init.c b/src/init.c
index aee08f5a..ecb73d6c 100644
--- a/src/init.c
+++ b/src/init.c
@@ -265,7 +265,10 @@ static bool _mi_heap_done(mi_heap_t* heap) {
 
   // free if not the main thread
   if (heap != &_mi_heap_main) {
-    mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
+    // the following assertion does not always hold for huge segments as those are always treated
+    // as abondened: one may allocate it in one thread, but deallocate in another in which case
+    // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363
+    // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
     _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
   }
 #if 0  
diff --git a/src/segment.c b/src/segment.c
index d2902e69..acb7c58d 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1311,7 +1311,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page);
   if (segment == NULL || page==NULL) return NULL;
   mi_assert_internal(segment->used==1);
-  mi_assert_internal(mi_page_block_size(page) >= size);
+  mi_assert_internal(mi_page_block_size(page) >= size);  
   segment->thread_id = 0; // huge segments are immediately abandoned
   return page;
 }
@@ -1334,7 +1334,6 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
     page->is_zero = false;
     mi_assert(page->used == 0);
     mi_tld_t* tld = heap->tld;
-    // mi_segments_track_size((long)segment->segment_size, tld);
     _mi_segment_page_free(page, true, &tld->segments);
   }
 #if (MI_DEBUG!=0)
diff --git a/test/main-override.cpp b/test/main-override.cpp
index fe5403d1..4acdb34e 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -32,14 +32,17 @@ void heap_late_free();         // issue #204
 void padding_shrink();         // issue #209
 void various_tests();
 void test_mt_shutdown();
+void large_alloc(void);        // issue #363
 
 int main() {
   mi_stats_reset();  // ignore earlier allocations
-  heap_thread_free_large();
-  heap_no_delete();
-  heap_late_free();
-  padding_shrink();
-  various_tests();
+  large_alloc();
+
+  //heap_thread_free_large();
+  //heap_no_delete();
+  //heap_late_free();
+  //padding_shrink();
+  //various_tests();
   //test_mt_shutdown();
   mi_stats_print(NULL);
   return 0;
@@ -176,7 +179,7 @@ void heap_thread_free_large_worker() {
 
 void heap_thread_free_large() {
   for (int i = 0; i < 100; i++) {
-    shared_p = mi_malloc_aligned(2*1024*1024 + 1, 8);
+    shared_p = mi_malloc_aligned(2 * 1024 * 1024 + 1, 8);
     auto t1 = std::thread(heap_thread_free_large_worker);
     t1.join();
   }
@@ -207,3 +210,15 @@ void test_mt_shutdown()
 
   std::cout << "done" << std::endl;
 }
+
+// issue #363
+using namespace std;
+
+void large_alloc(void)
+{
+  char* a = new char[1ull << 25];
+  thread th([&] {
+    delete[] a;
+    });
+  th.join();
+}
\ No newline at end of file

From dc6bce256d4f3ce87761f9337977dff3d8b1776c Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Tue, 6 Apr 2021 10:58:12 -0700
Subject: [PATCH 102/352] bump version to v2.0.1

---
 include/mimalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 48594de7..2cdded9f 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 200   // major + 2 digits minor
+#define MI_MALLOC_VERSION 201   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes

From 34ba03951e0756ad1c95c2dd01e967ed8a3f5745 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Tue, 6 Apr 2021 11:01:06 -0700
Subject: [PATCH 103/352] merge from dev

---
 include/mimalloc.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 988537c5..2cdded9f 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,11 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-<<<<<<< HEAD
 #define MI_MALLOC_VERSION 201   // major + 2 digits minor
-=======
-#define MI_MALLOC_VERSION 171   // major + 2 digits minor
->>>>>>> dev
 
 // ------------------------------------------------------
 // Compiler specific attributes

From 06110589740eb58532d387501e9fdfda146e92e3 Mon Sep 17 00:00:00 2001
From: diorszeng <54629524+diorszeng@users.noreply.github.com>
Date: Mon, 31 May 2021 15:02:17 +0800
Subject: [PATCH 104/352] Update mimalloc-types.h

fix typo
---
 include/mimalloc-types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 750a2b28..325f487a 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -117,7 +117,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
 #define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
 #define MI_SEGMENT_SLICE_SIZE             (1ULL<< MI_SEGMENT_SLICE_SHIFT)
-#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
+#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 128
 
 #define MI_SMALL_PAGE_SIZE                (1ULL<<MI_SMALL_PAGE_SHIFT)
 #define MI_MEDIUM_PAGE_SIZE               (1ULL<<MI_MEDIUM_PAGE_SHIFT)

From 069b3276df2c7d2b1f06e38fb94d3ea632c8f1af Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sun, 6 Jun 2021 20:33:55 -0700
Subject: [PATCH 105/352] merge from dev

---
 src/segment-cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 08517f4b..f655a92a 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -10,9 +10,9 @@ terms of the MIT license. A copy of the license can be found in the file
   the commit_mask to optimize the commit/decommit calls.
   The full memory map of all segments is also implemented here.
 -----------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
+#include "../include/mimalloc.h"
+#include "../include/mimalloc-internal.h"
+#include "../include/mimalloc-atomic.h"
 
 #include "bitmap.h"  // atomic bitmap
 

From e592360d4d9d91f8d5765ea6b2dd757a36a2fcea Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Mon, 7 Jun 2021 17:53:03 -0700
Subject: [PATCH 106/352] revert relative includes

---
 src/segment-cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index f655a92a..08517f4b 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -10,9 +10,9 @@ terms of the MIT license. A copy of the license can be found in the file
   the commit_mask to optimize the commit/decommit calls.
   The full memory map of all segments is also implemented here.
 -----------------------------------------------------------------------------*/
-#include "../include/mimalloc.h"
-#include "../include/mimalloc-internal.h"
-#include "../include/mimalloc-atomic.h"
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+#include "mimalloc-atomic.h"
 
 #include "bitmap.h"  // atomic bitmap
 

From 262022c1d1104874f304889a6ded878cd3d32cc6 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 1 Oct 2021 15:10:11 -0700
Subject: [PATCH 107/352] fix segment map for 32-bit systems (including wasm)

---
 src/segment-cache.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 08517f4b..00f904ab 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -225,13 +225,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
 #define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
 #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
 
-static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE];  // 2KiB per TB with 64MiB segments
+static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
 
 static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
   mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
   if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
     *bitidx = 0;
-    return 0;
+    return MI_SEGMENT_MAP_WSIZE;
   }
   else {
     uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
@@ -244,7 +244,7 @@ void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
-  if (index==0) return;
+  if (index==MI_SEGMENT_MAP_WSIZE) return;
   uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
   uintptr_t newmask;
   do {
@@ -256,7 +256,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) {
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
-  if (index == 0) return;
+  if (index == MI_SEGMENT_MAP_WSIZE) return;
   uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
   uintptr_t newmask;
   do {
@@ -274,7 +274,7 @@ static mi_segment_t* _mi_segment_of(const void* p) {
   if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) {
     return segment; // yes, allocated by us
   }
-  if (index==0) return NULL;
+  if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
 
   // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
 

From e6b58052dae764bf5f1b79ffb65fbbc19596934a Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sat, 2 Oct 2021 11:13:00 -0700
Subject: [PATCH 108/352] add start offset to pages to reduce cache/page
 effects

---
 include/mimalloc-types.h |  4 ++++
 src/alloc-aligned.c      |  2 +-
 src/page.c               |  4 ++--
 src/segment.c            | 14 ++++++++------
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index b3f247b0..2118dfbe 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -145,6 +145,10 @@ terms of the MIT license. A copy of the license can be found in the file
 // Used as a special value to encode block sizes in 32 bits.
 #define MI_HUGE_BLOCK_SIZE                ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
 
+// blocks up to this size are always allocated aligned
+#define MI_MAX_ALIGN_GUARANTEE  (8*MI_MAX_ALIGN_SIZE)  
+
+
 
 // The free lists use encoded next fields
 // (Only actually encodes when MI_ENCODED_FREELIST is defined.)
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 724c0a1b..2280783f 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -41,7 +41,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
   }
 
   // use regular allocation if it is guaranteed to fit the alignment constraints
-  if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) {
+  if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) {
     void* p = _mi_heap_malloc_zero(heap, size, zero);
     mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
     return p;
diff --git a/src/page.c b/src/page.c
index b732c078..82d5dd65 100644
--- a/src/page.c
+++ b/src/page.c
@@ -593,7 +593,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
     // ensure we don't touch memory beyond the page to reduce page commit.
     // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
     extend = (max_extend==0 ? 1 : max_extend);
-  }
+  }  
 
   mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
   mi_assert_internal(extend < (1UL<<16));
@@ -624,9 +624,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_internal(block_size > 0);
   // set fields
   mi_page_set_heap(page, heap);
+  page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
   size_t page_size;
   _mi_segment_page_start(segment, page, &page_size);
-  page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
   mi_assert_internal(mi_page_block_size(page) <= page_size);
   mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
   mi_assert_internal(page_size / block_size < (1L<<16));
diff --git a/src/segment.c b/src/segment.c
index 76ce2e0b..c6036c4a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -184,20 +184,22 @@ static size_t mi_segment_info_size(mi_segment_t* segment) {
   return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
 }
 
-static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t* page_size)
+static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size)
 {
   ptrdiff_t idx = slice - segment->slices;
   size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE;
-  if (page_size != NULL) *page_size = psize;
-  return (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
+  // make the start not OS page aligned for smaller blocks to avoid page/cache effects
+  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); 
+  if (page_size != NULL) *page_size = psize - start_offset;
+  return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
 }
 
 // Start of the page available memory; can be used on uninitialized pages
 uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
 {
   const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
-  uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page_size);
-  mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page);
+  uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page->xblock_size, page_size);  
+  mi_assert_internal(page->xblock_size > 0 || _mi_ptr_page(p) == page);
   mi_assert_internal(_mi_ptr_segment(p) == segment);
   return p;
 }
@@ -556,7 +558,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1);
 
   // commit before changing the slice data
-  if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) {
+  if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, 0, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) {
     return NULL;  // commit failed!
   }
 

From f945dbb390685b7b3c9bfd836ff3358c5c91ed41 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 19 Oct 2021 10:18:44 -0700
Subject: [PATCH 109/352] add space after _Atomic to prevent errors on msvc
 without /TP (see PR #452)

---
 src/segment-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 00f904ab..aab387f0 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -225,7 +225,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
 #define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
 #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
 
-static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
+static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
 
 static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
   mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?

From 2583ab73dcd4e88447784d7d33092a09f7ee426d Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 19 Oct 2021 13:57:36 -0700
Subject: [PATCH 110/352] remove region.c which belongs in dev only

---
 src/region.c | 505 ---------------------------------------------------
 1 file changed, 505 deletions(-)
 delete mode 100644 src/region.c

diff --git a/src/region.c b/src/region.c
deleted file mode 100644
index 2f68b140..00000000
--- a/src/region.c
+++ /dev/null
@@ -1,505 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
-and the segment and huge object allocation by mimalloc. There may be multiple
-implementations of this (one could be the identity going directly to the OS,
-another could be a simple cache etc), but the current one uses large "regions".
-In contrast to the rest of mimalloc, the "regions" are shared between threads and
-need to be accessed using atomic operations.
-We need this memory layer between the raw OS calls because of:
-1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
-   to reuse memory effectively.
-2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
-   an OS allocation/free is still (much) too expensive relative to the accesses 
-   in that object :-( (`malloc-large` tests this). This means we need a cheaper 
-   way to reuse memory.
-3. This layer allows for NUMA aware allocation.
-
-Possible issues:
-- (2) can potentially be addressed too with a small cache per thread which is much
-  simpler. Generally though that requires shrinking of huge pages, and may overuse
-  memory per thread. (and is not compatible with `sbrk`).
-- Since the current regions are per-process, we need atomic operations to
-  claim blocks which may be contended
-- In the worst case, we need to search the whole region map (16KiB for 256GiB)
-  linearly. At what point will direct OS calls be faster? Is there a way to
-  do this better without adding too much complexity?
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <string.h>  // memset
-
-#include "bitmap.h"
-
-// Internal raw OS interface
-size_t  _mi_os_large_page_size(void);
-bool    _mi_os_protect(void* addr, size_t size);
-bool    _mi_os_unprotect(void* addr, size_t size);
-bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
-bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
-bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-
-// arena.c
-void    _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats);
-void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-
-
-
-// Constants
-#if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * GiB)  // 64KiB for the region map 
-#elif (MI_INTPTR_SIZE==4)
-#define MI_HEAP_REGION_MAX_SIZE    (3 * GiB)    // ~ KiB for the region map
-#else
-#error "define the maximum heap space allowed for regions on this platform"
-#endif
-
-#define MI_SEGMENT_ALIGN          MI_SEGMENT_SIZE
-
-#define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
-#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
-#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
-#define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
-#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)  
-
-// Region info 
-typedef union mi_region_info_u {
-  uintptr_t value;      
-  struct {
-    bool  valid;        // initialized?
-    bool  is_large:1;   // allocated in fixed large/huge OS pages
-    bool  is_pinned:1;  // pinned memory cannot be decommitted
-    short numa_node;    // the associated NUMA node (where -1 means no associated node)
-  } x;
-} mi_region_info_t;
-
-
-// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
-// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
-typedef struct mem_region_s {
-  _Atomic(uintptr_t)        info;        // mi_region_info_t.value
-  _Atomic(void*)            start;       // start of the memory area 
-  mi_bitmap_field_t         in_use;      // bit per in-use block
-  mi_bitmap_field_t         dirty;       // track if non-zero per block
-  mi_bitmap_field_t         commit;      // track if committed per block
-  mi_bitmap_field_t         reset;       // track if reset per block
-  _Atomic(uintptr_t)        arena_memid; // if allocated from a (huge page) arena
-  uintptr_t                 padding;     // round to 8 fields
-} mem_region_t;
-
-// The region map
-static mem_region_t regions[MI_REGION_MAX];
-
-// Allocated regions
-static _Atomic(uintptr_t) regions_count; // = 0;        
-
-
-/* ----------------------------------------------------------------------------
-Utility functions
------------------------------------------------------------------------------*/
-
-// Blocks (of 4MiB) needed for the given size.
-static size_t mi_region_block_count(size_t size) {
-  return _mi_divide_up(size, MI_SEGMENT_SIZE);
-}
-
-/*
-// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
-static size_t mi_good_commit_size(size_t size) {
-  if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
-  return _mi_align_up(size, _mi_os_large_page_size());
-}
-*/
-
-// Return if a pointer points into a region reserved by us.
-bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  if (p==NULL) return false;
-  size_t count = mi_atomic_load_relaxed(&regions_count);
-  for (size_t i = 0; i < count; i++) {
-    uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, &regions[i].start);
-    if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
-  }
-  return false;
-}
-
-
-static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
-  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
-  mi_assert_internal(start != NULL);
-  return (start + (bit_idx * MI_SEGMENT_SIZE));  
-}
-
-static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
-  mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
-  size_t idx = region - regions;
-  mi_assert_internal(&regions[idx] == region);
-  return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
-}
-
-static size_t mi_memid_create_from_arena(size_t arena_memid) {
-  return (arena_memid << 1) | 1;
-}
-
-
-static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
-  if ((id&1)==1) {
-    if (arena_memid != NULL) *arena_memid = (id>>1);
-    return true;
-  }
-  else {
-    size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
-    *bit_idx   = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
-    *region    = &regions[idx];
-    return false;
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  Allocate a region is allocated from the OS (or an arena)
------------------------------------------------------------------------------*/
-
-static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
-{
-  // not out of regions yet?
-  if (mi_atomic_load_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
-
-  // try to allocate a fresh region from the OS
-  bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
-  bool region_large = (commit && allow_large);
-  bool is_zero = false;
-  bool is_pinned = false;
-  size_t arena_memid = 0;
-  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_pinned, &is_zero, &arena_memid, tld);
-  if (start == NULL) return false;
-  mi_assert_internal(!(region_large && !allow_large));
-  mi_assert_internal(!region_large || region_commit);
-
-  // claim a fresh slot
-  const uintptr_t idx = mi_atomic_increment_acq_rel(&regions_count);
-  if (idx >= MI_REGION_MAX) {
-    mi_atomic_decrement_acq_rel(&regions_count);
-    _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
-    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB));
-    return false;
-  }
-
-  // allocated, initialize and claim the initial blocks
-  mem_region_t* r = &regions[idx];
-  r->arena_memid  = arena_memid;
-  mi_atomic_store_release(&r->in_use, (uintptr_t)0);
-  mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
-  mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
-  mi_atomic_store_release(&r->reset, (uintptr_t)0);
-  *bit_idx = 0;
-  _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
-  mi_atomic_store_ptr_release(void,&r->start, start);
-
-  // and share it 
-  mi_region_info_t info;
-  info.value = 0;                        // initialize the full union to zero
-  info.x.valid = true;
-  info.x.is_large = region_large;
-  info.x.is_pinned = is_pinned;
-  info.x.numa_node = (short)_mi_os_numa_node(tld);
-  mi_atomic_store_release(&r->info, info.value); // now make it available to others
-  *region = r;
-  return true;
-}
-
-/* ----------------------------------------------------------------------------
-  Try to claim blocks in suitable regions
------------------------------------------------------------------------------*/
-
-static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
-  // initialized at all?
-  mi_region_info_t info;
-  info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info);
-  if (info.value==0) return false;
-
-  // numa correct
-  if (numa_node >= 0) {  // use negative numa node to always succeed
-    int rnode = info.x.numa_node;
-    if (rnode >= 0 && rnode != numa_node) return false;
-  }
-
-  // check allow-large
-  if (!allow_large && info.x.is_large) return false;
-
-  return true;
-}
-
-
-static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
-{
-  // try all regions for a free slot  
-  const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
-  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
-  for (size_t visited = 0; visited < count; visited++, idx++) {
-    if (idx >= count) idx = 0;  // wrap around
-    mem_region_t* r = &regions[idx];
-    // if this region suits our demand (numa node matches, large OS page matches)
-    if (mi_region_is_suitable(r, numa_node, allow_large)) {
-      // then try to atomically claim a segment(s) in this region
-      if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
-        tld->region_idx = idx;    // remember the last found position
-        *region = r;
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-
-static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
-  mem_region_t* region;
-  mi_bitmap_index_t bit_idx;
-  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
-  // try to claim in existing regions
-  if (!mi_region_try_claim(numa_node, blocks, *large, &region, &bit_idx, tld)) {
-    // otherwise try to allocate a fresh region and claim in there
-    if (!mi_region_try_alloc_os(blocks, *commit, *large, &region, &bit_idx, tld)) {
-      // out of regions or memory
-      return NULL;
-    }
-  }
-  
-  // ------------------------------------------------
-  // found a region and claimed `blocks` at `bit_idx`, initialize them now
-  mi_assert_internal(region != NULL);
-  mi_assert_internal(_mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
-
-  mi_region_info_t info;
-  info.value = mi_atomic_load_acquire(&region->info);
-  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&region->start);
-  mi_assert_internal(!(info.x.is_large && !*large));
-  mi_assert_internal(start != NULL);
-
-  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);  
-  *large     = info.x.is_large;
-  *is_pinned = info.x.is_pinned;
-  *memid     = mi_memid_create(region, bit_idx);
-  void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
-
-  // commit
-  if (*commit) {
-    // ensure commit
-    bool any_uncommitted;
-    _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
-    if (any_uncommitted) {
-      mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-      bool commit_zero = false;
-      if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) {
-        // failed to commit! unclaim and return
-        mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-        return NULL;
-      }
-      if (commit_zero) *is_zero = true;      
-    }
-  }
-  else {
-    // no need to commit, but check if already fully committed
-    *commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
-  }  
-  mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
-
-  // unreset reset blocks
-  if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
-    // some blocks are still reset
-    mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); 
-    mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
-    if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
-      bool reset_zero = false;
-      _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
-      if (reset_zero) *is_zero = true;
-    }
-  }
-  mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
-  
-  #if (MI_DEBUG>=2)
-  if (*commit) { ((uint8_t*)p)[0] = 0; }
-  #endif
-  
-  // and return the allocation  
-  mi_assert_internal(p != NULL);  
-  return p;
-}
-
-
-/* ----------------------------------------------------------------------------
- Allocation
------------------------------------------------------------------------------*/
-
-// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
-// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(memid != NULL && tld != NULL);
-  mi_assert_internal(size > 0);
-  *memid = 0;
-  *is_zero = false;
-  *is_pinned = false;
-  bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`  
-  if (size == 0) return NULL;
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  // allocate from regions if possible
-  void* p = NULL;
-  size_t arena_memid;
-  const size_t blocks = mi_region_block_count(size);
-  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
-    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);    
-    if (p == NULL) {
-      _mi_warning_message("unable to allocate from region: size %zu\n", size);
-    }
-  }
-  if (p == NULL) {
-    // and otherwise fall back to the OS
-    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, &arena_memid, tld);
-    *memid = mi_memid_create_from_arena(arena_memid);
-  }
-
-  if (p != NULL) {
-    mi_assert_internal((uintptr_t)p % alignment == 0);
-#if (MI_DEBUG>=2)
-    if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
-#endif
-  }
-  return p;
-}
-
-
-
-/* ----------------------------------------------------------------------------
-Free
------------------------------------------------------------------------------*/
-
-// Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
-  mi_assert_internal(size > 0 && tld != NULL);
-  if (p==NULL) return;
-  if (size==0) return;
-  size = _mi_align_up(size, _mi_os_page_size());
-  
-  size_t arena_memid = 0;
-  mi_bitmap_index_t bit_idx;
-  mem_region_t* region;
-  if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
-   // was a direct arena allocation, pass through
-    _mi_arena_free(p, size, arena_memid, full_commit, tld->stats);
-  }
-  else {
-    // allocated in a region
-    mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
-    const size_t blocks = mi_region_block_count(size);
-    mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
-    mi_region_info_t info;
-    info.value = mi_atomic_load_acquire(&region->info);
-    mi_assert_internal(info.value != 0);
-    void* blocks_start = mi_region_blocks_start(region, bit_idx);
-    mi_assert_internal(blocks_start == p); // not a pointer in our area?
-    mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
-    if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
-
-    // committed?
-    if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
-      _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
-    }
-
-    if (any_reset) {
-      // set the is_reset bits if any pages were reset
-      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
-    }
-
-    // reset the blocks to reduce the working set.
-    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) 
-       && (mi_option_is_enabled(mi_option_eager_commit) ||
-           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead            
-    {
-      bool any_unreset;
-      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
-      if (any_unreset) {
-        _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
-        _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
-      }
-    }    
-
-    // and unclaim
-    bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-    mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed);
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  collection
------------------------------------------------------------------------------*/
-void _mi_mem_collect(mi_os_tld_t* tld) {
-  // free every region that has no segments in use.
-  uintptr_t rcount = mi_atomic_load_relaxed(&regions_count);
-  for (size_t i = 0; i < rcount; i++) {
-    mem_region_t* region = &regions[i];
-    if (mi_atomic_load_relaxed(&region->info) != 0) {
-      // if no segments used, try to claim the whole region
-      uintptr_t m = mi_atomic_load_relaxed(&region->in_use);
-      while (m == 0 && !mi_atomic_cas_weak_release(&region->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
-      if (m == 0) {
-        // on success, free the whole region
-        uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&regions[i].start);
-        size_t arena_memid = mi_atomic_load_relaxed(&regions[i].arena_memid);
-        uintptr_t commit = mi_atomic_load_relaxed(&regions[i].commit);
-        memset(&regions[i], 0, sizeof(mem_region_t));
-        // and release the whole region
-        mi_atomic_store_release(&region->info, (uintptr_t)0);
-        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {         
-          _mi_abandoned_await_readers(); // ensure no pending reads
-          _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
-        }
-      }
-    }
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  Other
------------------------------------------------------------------------------*/
-
-bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  return _mi_os_reset(p, size, tld->stats);
-}
-
-bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  return _mi_os_unreset(p, size, is_zero, tld->stats);
-}
-
-bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  return _mi_os_commit(p, size, is_zero, tld->stats);
-}
-
-bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
-  return _mi_os_decommit(p, size, tld->stats);
-}
-
-bool _mi_mem_protect(void* p, size_t size) {
-  return _mi_os_protect(p, size);
-}
-
-bool _mi_mem_unprotect(void* p, size_t size) {
-  return _mi_os_unprotect(p, size);
-}

From 54b65a556cf1225a5995865d1077340e192112e7 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 27 Oct 2021 10:15:12 -0700
Subject: [PATCH 111/352] fix mi_cfree assertion failure for NULL pointer,
 issue #478

---
 src/segment-cache.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index aab387f0..7bb58ccf 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -267,6 +267,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) {
 // Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
 static mi_segment_t* _mi_segment_of(const void* p) {
   mi_segment_t* segment = _mi_ptr_segment(p);
+  if (segment == NULL) return NULL; 
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge

From 1568dbb9e43f3a4f88c596bf5a52b523e8cf6053 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 27 Oct 2021 10:35:16 -0700
Subject: [PATCH 112/352] fix mi_is_valid_pointer bit index search (related to
 issue #478)

---
 src/segment-cache.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 7bb58ccf..4adf2123 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -234,9 +234,11 @@ static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitid
     return MI_SEGMENT_MAP_WSIZE;
   }
   else {
-    uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
+    const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
     *bitidx = segindex % MI_INTPTR_BITS;
-    return (segindex / MI_INTPTR_BITS);
+    const size_t mapindex = segindex / MI_INTPTR_BITS;
+    mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
+    return mapindex;
   }
 }
 
@@ -290,13 +292,21 @@ static mi_segment_t* _mi_segment_of(const void* p) {
     loindex = index;
     lobitidx = mi_bsr(lobits);    // lobits != 0
   }
+  else if (index == 0) {
+    return NULL;
+  }
   else {
+    mi_assert_internal(index > 0);
     uintptr_t lomask = mask;
-    loindex = index - 1;
-    while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--;
-    if (loindex==0) return NULL;
+    loindex = index;
+    do {
+      loindex--;  
+      lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);      
+    } while (lomask != 0 && loindex > 0);
+    if (lomask == 0) return NULL;
     lobitidx = mi_bsr(lomask);    // lomask != 0
   }
+  mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
   // take difference as the addresses could be larger than the MAX_ADDRESS space.
   size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
   segment = (mi_segment_t*)((uint8_t*)segment - diff);

From e4776337797489e19278c2bfc4e75c783ba5bff8 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 27 Oct 2021 10:41:14 -0700
Subject: [PATCH 113/352] fix assertion

---
 src/segment-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 4adf2123..9e409be4 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -245,7 +245,7 @@ static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitid
 void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
-  mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
+  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
   if (index==MI_SEGMENT_MAP_WSIZE) return;
   uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
   uintptr_t newmask;

From 7756e1b5fea6501a63e7a1b082e1532c3b1d7240 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 27 Oct 2021 10:45:19 -0700
Subject: [PATCH 114/352] fix assertion

---
 src/segment-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 9e409be4..d7604502 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -257,7 +257,7 @@ void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
 void _mi_segment_map_freed_at(const mi_segment_t* segment) {
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
-  mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
+  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
   if (index == MI_SEGMENT_MAP_WSIZE) return;
   uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
   uintptr_t newmask;

From 49c75a31574812a06a6384826421e9603f30032d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 9 Nov 2021 20:19:31 -0800
Subject: [PATCH 115/352] wip: increase commit mask resolution

---
 include/mimalloc-internal.h | 184 ++++++++++++++++++++++++++----------
 include/mimalloc-types.h    |  31 ++++--
 src/options.c               |   4 +-
 src/segment-cache.c         |  24 +++--
 src/segment.c               | 135 +++++++++++++++-----------
 test/test-stress.c          |   4 +-
 6 files changed, 255 insertions(+), 127 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 8b642c31..7e400217 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -86,7 +86,7 @@ void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed,
 
 // "segment-cache.c"
 void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
+bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
 
@@ -691,77 +691,163 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
 // commit mask
 // -------------------------------------------------------------------
 
-#define MI_COMMIT_MASK_BITS  (sizeof(mi_commit_mask_t)*8)
 
-static inline mi_commit_mask_t mi_commit_mask_empty(void) {
-  return 0;
+static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
+  memset(cm, 0, sizeof(*cm));
 }
 
-static inline mi_commit_mask_t mi_commit_mask_full(void) {
-  return ~mi_commit_mask_empty();
+static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
+  memset(cm, 0xFF, sizeof(*cm));
 }
 
-static inline mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) {
+static inline void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) {
   mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
   mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
   if (bitcount == MI_COMMIT_MASK_BITS) {
     mi_assert_internal(bitidx==0);
-    return mi_commit_mask_full();
+    mi_commit_mask_create_full(cm);
   }
   else if (bitcount == 0) {
-    return mi_commit_mask_empty();
+    mi_commit_mask_create_empty(cm);
   }
   else {
-    return (((uintptr_t)1 << bitcount) - 1) << bitidx;
+    mi_commit_mask_create_empty(cm);
+    ptrdiff_t i   = bitidx / MI_COMMIT_MASK_FIELD_BITS;
+    ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS;
+    while (bitcount > 0) {
+      mi_assert_internal(i < MI_COMMIT_MASK_N);
+      ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs;
+      ptrdiff_t count = (bitcount > avail ? avail : bitcount);
+      size_t mask = (((size_t)1 << count) - 1) << ofs;
+      cm->mask[i] = mask;
+      bitcount -= count;
+      ofs = 0;
+      i++;
+    }    
   }
 }
 
-static inline bool mi_commit_mask_is_empty(mi_commit_mask_t mask) {
-  return (mask == 0);
-}
-
-static inline bool mi_commit_mask_is_full(mi_commit_mask_t mask) {
-  return ((~mask) == 0);
-}
-
-static inline bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
-  return ((commit & mask) == mask);
-}
-
-static inline bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
-  return ((commit & mask) != 0);
-}
-
-mi_decl_nodiscard static inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) {
-  return (commit & mask);
-}
-
-static inline void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
-  *commit = (*commit) & (~mask);
-}
-
-static inline void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
-  *commit = (*commit) | mask;
-}
-
-static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total) {
-  if (mi_commit_mask_is_full(mask)) {
-    return total;
+static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    if (cm->mask[i] != 0) return false;
   }
-  else if (mi_commit_mask_is_empty(mask)) {
+  return true;
+}
+
+static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    if (cm->mask[i] != 0) return false;
+  }
+  return true;
+}
+
+static inline bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false;
+  }
+  return true;
+}
+
+static inline bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    if ((commit->mask[i] & cm->mask[i]) != 0) return true;
+  }
+  return false;
+}
+
+static inline void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    res->mask[i] = (commit->mask[i] & cm->mask[i]);
+  }
+}
+
+static inline void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    res->mask[i] &= ~(cm->mask[i]);
+  }
+}
+  
+static inline void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    res->mask[i] |= cm->mask[i];
+  }
+}
+
+static inline size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) {  
+  mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
+  size_t count = 0;
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    size_t mask = cm->mask[i];
+    if (~mask == 0) {
+      count += MI_COMMIT_MASK_FIELD_BITS;
+    }
+    else {
+      for (; mask != 0; mask >>= 1) {  // todo: use popcount
+        if ((mask&1)!=0) count++;
+      }
+    }
+  }  
+  // we use total since for huge segments each commit bit may represent a larger size
+  return (total / MI_COMMIT_MASK_BITS)* count;
+}
+
+
+static inline ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx ) {
+  ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS;
+  ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS;
+  size_t mask = 0;
+  // find first ones
+  while (i < MI_COMMIT_MASK_N) {
+    mask = cm->mask[i];
+    mask >>= ofs;
+    if (mask != 0) {
+      while ((mask&1) == 0) {
+        mask >>= 1;
+        ofs++;
+      }
+      break;
+    }
+    i++;
+    ofs = 0;
+  }
+  if (i >= MI_COMMIT_MASK_N) {
+    // not found
+    *idx = MI_COMMIT_MASK_BITS;
     return 0;
   }
   else {
-    size_t count = 0;
-    for (; mask != 0; mask >>= 1) {  // todo: use popcount
-      if ((mask&1)!=0) count++;
-    }
-    return (total/MI_COMMIT_MASK_BITS)*count;
+    // found, count ones
+    ptrdiff_t count = 0;
+    *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs;
+    mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1);
+    do {
+      do {
+        count++;
+        mask >>= 1;
+      } while (mask != 0);
+      if ((((count + ofs) % MI_COMMIT_MASK_FIELD_BITS) == 0)) {
+        i++;
+        if (i >= MI_COMMIT_MASK_N) break;
+        mask = cm->mask[i];
+        if ((mask&1)==0) break;
+        ofs = 0;
+      }
+    } while (mask != 0);
+    mi_assert_internal(count > 0);
+    return count;
   }
 }
 
+#define mi_commit_mask_foreach(cm,idx,count) \
+  idx = 0; \
+  while ((count = mi_commit_mask_next_run(cm,&idx)) > 0) { 
+        
+#define mi_commit_mask_foreach_end() \
+    idx += count; \
+  }
+      
 
-#define mi_commit_mask_foreach(mask,idx,count) \
+#define xmi_commit_mask_foreach(mask,idx,count) \
   idx = 0; \
   while (mask != 0) {     \
     /* count ones */      \
@@ -773,7 +859,7 @@ static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t
     /* if found, do action */ \
     if (count > 0) {
 
-#define mi_commit_mask_foreach_end() \
+#define xmi_commit_mask_foreach_end() \
     } \
     idx += count; \
     /* shift out the zero */ \
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 82d74f7f..50e24fc9 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -83,9 +83,9 @@ terms of the MIT license. A copy of the license can be found in the file
 // or otherwise one might define an intptr_t type that is larger than a pointer...
 // ------------------------------------------------------
 
-#if INTPTR_MAX == 9223372036854775807LL
+#if INTPTR_MAX == INT64_MAX
 # define MI_INTPTR_SHIFT (3)
-#elif INTPTR_MAX == 2147483647LL
+#elif INTPTR_MAX == INT32_MAX
 # define MI_INTPTR_SHIFT (2)
 #else
 #error platform must be 32 or 64 bits
@@ -94,6 +94,18 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_INTPTR_SIZE  (1<<MI_INTPTR_SHIFT)
 #define MI_INTPTR_BITS  (MI_INTPTR_SIZE*8)
 
+#if SIZE_MAX == UINT64_MAX
+# define MI_SIZE_SHIFT (3)
+#elif SIZE_MAX == UINT32_MAX
+# define MI_SIZE_SHIFT (2)
+#else
+#error platform must be 32 or 64 bits
+#endif
+
+#define MI_SIZE_SIZE  (1<<MI_INTPTR_SHIFT)
+#define MI_SIZE_BITS  (MI_INTPTR_SIZE*8)
+
+
 #define KiB     ((size_t)1024)
 #define MiB     (KiB*KiB)
 #define GiB     (MiB*KiB)
@@ -275,17 +287,22 @@ typedef enum mi_segment_kind_e {
   MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
 } mi_segment_kind_t;
 
-#define MI_COMMIT_SIZE    (MI_SEGMENT_SIZE/MI_INTPTR_BITS)
+#define MI_COMMIT_SIZE             (128*1024)   
+#define MI_COMMIT_MASK_BITS        (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
+#define MI_COMMIT_MASK_FIELD_BITS  MI_SIZE_BITS
+#define MI_COMMIT_MASK_N           (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
 
-#if (((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE) > 8*MI_INTPTR_SIZE)
-#error "not enough commit bits to cover the segment size"
+#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_N * MI_COMMIT_MASK_FIELD_BITS))
+#error "the segment size must be exactly divisible by the (commit size * size_t bits)"
 #endif
 
-typedef mi_page_t  mi_slice_t;
+typedef struct mi_commit_mask_s {
+  size_t mask[MI_COMMIT_MASK_N];
+} mi_commit_mask_t;
 
+typedef mi_page_t  mi_slice_t;
 typedef int64_t    mi_msecs_t;
 
-typedef uintptr_t  mi_commit_mask_t;
 
 // Segments are large allocated memory blocks (8mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
diff --git a/src/options.c b/src/options.c
index 5f2eedec..5ea7a92a 100644
--- a/src/options.c
+++ b/src/options.c
@@ -66,7 +66,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(verbose) },
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
+  { 0, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
   #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
   { 0, UNINIT, MI_OPTION(eager_region_commit) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
@@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 500,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 10,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
   { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
diff --git a/src/segment-cache.c b/src/segment-cache.c
index d7604502..3a318cd4 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -76,11 +76,10 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
   *memid = slot->memid;
   *is_pinned = slot->is_pinned;
   *is_zero = false;
-  mi_commit_mask_t cmask = slot->commit_mask;  // copy
+  *commit_mask = slot->commit_mask;
   slot->p = NULL;
   mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
-  *commit_mask = cmask;
-
+  
   // mark the slot as free again
   mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
   _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
@@ -90,27 +89,26 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
 
 static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
 {
-  if (mi_commit_mask_is_empty(*cmask)) {
+  if (mi_commit_mask_is_empty(cmask)) {
     // nothing
   }
-  else if (mi_commit_mask_is_full(*cmask)) {
+  else if (mi_commit_mask_is_full(cmask)) {
     _mi_os_decommit(p, total, stats);
   }
   else {
     // todo: one call to decommit the whole at once?
     mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
     size_t    part = total/MI_COMMIT_MASK_BITS;
-    uintptr_t idx;
-    uintptr_t count;
-    mi_commit_mask_t mask = *cmask;
-    mi_commit_mask_foreach(mask, idx, count) {
+    ptrdiff_t idx;
+    ptrdiff_t count;    
+    mi_commit_mask_foreach(cmask, idx, count) {
       void*  start = (uint8_t*)p + (idx*part);
       size_t size = count*part;
       _mi_os_decommit(start, size, stats);
     }
     mi_commit_mask_foreach_end()
   }
-  *cmask = mi_commit_mask_empty();
+  mi_commit_mask_create_empty(cmask);
 }
 
 #define MI_MAX_PURGE_PER_PUSH  (4)
@@ -135,7 +133,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
         if (expire != 0 && now >= expire) {  // safe read
           // still expired, decommit it
           mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-          mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+          mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
           _mi_abandoned_await_readers();  // wait until safe to decommit
           // decommit committed parts
           // TODO: instead of decommit, we could also free to the OS?
@@ -148,7 +146,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
   }
 }
 
-mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
+mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
   return false;
@@ -187,7 +185,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   slot->memid = memid;
   slot->is_pinned = is_pinned;
   mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-  slot->commit_mask = commit_mask;
+  slot->commit_mask = *commit_mask;
   if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
     long delay = mi_option_get(mi_option_segment_decommit_delay);
     if (delay == 0) {
diff --git a/src/segment.c b/src/segment.c
index 6ae3d9af..ee0a2ae2 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -127,7 +127,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(segment->abandoned <= segment->used);
   mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
-  mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask)); // can only decommit committed blocks
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks
   //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
   mi_slice_t* slice = &segment->slices[0];
   const mi_slice_t* end = mi_segment_slices_end(segment);
@@ -256,8 +256,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
-  if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
-    const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size);
+  if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
+    const size_t csize = mi_commit_mask_committed_size(&segment->commit_mask, size);
     if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
     _mi_abandoned_await_readers();  // wait until safe to free
     _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os);
@@ -331,74 +331,85 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
    Span management
 ----------------------------------------------------------- */
 
-static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size) {
-  mi_assert_internal(_mi_ptr_segment(p) == segment);
-  if (size == 0 || size > MI_SEGMENT_SIZE) return 0;
-  if (p >= (uint8_t*)segment + mi_segment_size(segment)) return 0;
+static ptrdiff_t _mi_aligni_up(ptrdiff_t sz, size_t alignment) {
+  return (ptrdiff_t)_mi_align_up(sz, alignment);
+}
 
-  uintptr_t diff = (p - (uint8_t*)segment);
-  uintptr_t start;
-  uintptr_t end;
+static ptrdiff_t _mi_aligni_down(ptrdiff_t sz, size_t alignment) {
+  return (ptrdiff_t)_mi_align_down(sz, alignment);
+}
+
+static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) {
+  mi_assert_internal(_mi_ptr_segment(p) == segment);
+  mi_commit_mask_create_empty(cm);
+  if (size == 0 || size > MI_SEGMENT_SIZE) return;
+  if (p >= (uint8_t*)segment + mi_segment_size(segment)) return;
+
+  ptrdiff_t diff = (p - (uint8_t*)segment);
+  ptrdiff_t start;
+  ptrdiff_t end;
   if (conservative) {
-    start = _mi_align_up(diff, MI_COMMIT_SIZE);
-    end   = _mi_align_down(diff + size, MI_COMMIT_SIZE);
+    start = _mi_aligni_up(diff, MI_COMMIT_SIZE);
+    end   = _mi_aligni_down(diff + size, MI_COMMIT_SIZE);
   }
   else {
-    start = _mi_align_down(diff, MI_COMMIT_SIZE);
-    end   = _mi_align_up(diff + size, MI_COMMIT_SIZE);
+    start = _mi_aligni_down(diff, MI_COMMIT_SIZE);
+    end   = _mi_aligni_up(diff + size, MI_COMMIT_SIZE);
   }
 
   mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0);
   *start_p   = (uint8_t*)segment + start;
   *full_size = (end > start ? end - start : 0);
-  if (*full_size == 0) return 0;
+  if (*full_size == 0) return;
 
-  uintptr_t bitidx = start / MI_COMMIT_SIZE;
-  mi_assert_internal(bitidx < (MI_INTPTR_SIZE*8));
+  ptrdiff_t bitidx = start / MI_COMMIT_SIZE;
+  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
   
-  uintptr_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
-  if (bitidx + bitcount > MI_INTPTR_SIZE*8) {
+  ptrdiff_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
+  if (bitidx + bitcount > MI_COMMIT_MASK_BITS) {
     _mi_warning_message("commit mask overflow: %zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size);
   }
-  mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8));
-
-  return mi_commit_mask_create(bitidx, bitcount);
+  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
+  mi_commit_mask_create(bitidx, bitcount, cm);
 }
 
 static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
   // commit liberal, but decommit conservative
   uint8_t* start;
   size_t   full_size;
-  mi_commit_mask_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size);  
-  if (mi_commit_mask_is_empty(mask) || full_size==0) return true;
+  mi_commit_mask_t mask;
+  mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask);
+  if (mi_commit_mask_is_empty(&mask) || full_size==0) return true;
 
-  if (commit && !mi_commit_mask_all_set(segment->commit_mask, mask)) {
+  if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) {
     bool is_zero = false;
-    mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask);
-    _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
+    mi_commit_mask_t cmask;
+    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
+    _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
     if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;    
-    mi_commit_mask_set(&segment->commit_mask,mask);     
+    mi_commit_mask_set(&segment->commit_mask, &mask);     
   }
-  else if (!commit && mi_commit_mask_any_set(segment->commit_mask,mask)) {
+  else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) {
     mi_assert_internal((void*)start != (void*)segment);
-    mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask);
-    _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
+    mi_commit_mask_t cmask;
+    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
+    _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
     if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); } // ok if this fails
-    mi_commit_mask_clear(&segment->commit_mask, mask);
+    mi_commit_mask_clear(&segment->commit_mask, &mask);
   }
   // increase expiration of reusing part of the delayed decommit
-  if (commit && mi_commit_mask_any_set(segment->decommit_mask, mask)) {
+  if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) {
     segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
   }
   // always undo delayed decommits
-  mi_commit_mask_clear(&segment->decommit_mask, mask);   
-  mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask);
+  mi_commit_mask_clear(&segment->decommit_mask, &mask);   
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
   return true;
 }
 
 static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask));
-  if (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask)) return true; // fully committed
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
+  if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed
   return mi_segment_commitx(segment,true,p,size,stats);
 }
 
@@ -411,27 +422,30 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     // register for future decommit in the decommit mask
     uint8_t* start;
     size_t   full_size;
-    mi_commit_mask_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size);
-    if (mi_commit_mask_is_empty(mask) || full_size==0) return;
+    mi_commit_mask_t mask; 
+    mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask);
+    if (mi_commit_mask_is_empty(&mask) || full_size==0) return;
     
     // update delayed commit
-    mi_commit_mask_set(&segment->decommit_mask, mi_commit_mask_intersect(mask,segment->commit_mask));  // only decommit what is committed; span_free may try to decommit more
+    mi_commit_mask_t cmask;
+    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
+    mi_commit_mask_set(&segment->decommit_mask, &cmask);
     segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
   }  
 }
 
 static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) {
-  if (!segment->allow_decommit || mi_commit_mask_is_empty(segment->decommit_mask)) return;
+  if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return;
   mi_msecs_t now = _mi_clock_now();
   if (!force && now < segment->decommit_expire) return;
 
   mi_commit_mask_t mask = segment->decommit_mask;
   segment->decommit_expire = 0;
-  segment->decommit_mask = mi_commit_mask_empty();
+  mi_commit_mask_create_empty(&segment->decommit_mask);
 
-  uintptr_t idx;
-  uintptr_t count;
-  mi_commit_mask_foreach(mask, idx, count) {
+  ptrdiff_t idx;
+  ptrdiff_t count;
+  mi_commit_mask_foreach(&mask, idx, count) {
     // if found, decommit that sequence
     if (count > 0) {
       uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE);
@@ -439,8 +453,7 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
       mi_segment_commitx(segment, false, p, size, stats);
     }
   }
-  mi_commit_mask_foreach_end()
-  mi_assert_internal(mi_commit_mask_is_empty(segment->decommit_mask));
+  mi_commit_mask_foreach_end()  
 }
 
 
@@ -649,7 +662,14 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   // Try to get from our cache first
   bool is_zero = false;
   const bool commit_info_still_good = (segment != NULL);
-  mi_commit_mask_t commit_mask = (segment != NULL ? segment->commit_mask : mi_commit_mask_empty());
+  mi_commit_mask_t commit_mask;
+  if (segment != NULL) {
+    commit_mask = segment->commit_mask;
+  }
+  else {
+    mi_commit_mask_create_empty(&commit_mask);
+  }
+
   if (segment==NULL) {
     // Allocate the segment from the OS
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
@@ -659,23 +679,30 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     if (segment==NULL) {
       segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
       if (segment == NULL) return NULL;  // failed to allocate
-      commit_mask = (commit ? mi_commit_mask_full() : mi_commit_mask_empty());
+      if (commit) {
+        mi_commit_mask_create_full(&commit_mask);
+      }
+      else {
+        mi_commit_mask_create_empty(&commit_mask);
+      }
     }    
     mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
     const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
     mi_assert_internal(commit_needed>0);
-    if (!mi_commit_mask_all_set(commit_mask,mi_commit_mask_create(0, commit_needed))) {
+    mi_commit_mask_t commit_needed_mask;
+    mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
+    if (!mi_commit_mask_all_set(&commit_mask, &commit_needed_mask)) {
       // at least commit the info slices
-      mi_assert_internal(commit_needed*MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE);
+      mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= info_slices*MI_SEGMENT_SLICE_SIZE);
       bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, &is_zero, tld->stats);
       if (!ok) return NULL; // failed to commit   
-      mi_commit_mask_set(&commit_mask,mi_commit_mask_create(0, commit_needed)); 
+      mi_commit_mask_set(&commit_mask, &commit_needed_mask); 
     }
     segment->memid = memid;
     segment->mem_is_pinned = is_pinned;
     segment->mem_is_large = mem_large;
-    segment->mem_is_committed = mi_commit_mask_is_full(commit_mask);
+    segment->mem_is_committed = mi_commit_mask_is_full(&commit_mask);
     mi_segments_track_size((long)(segment_size), tld);
     _mi_segment_map_allocated_at(segment);
   }
@@ -692,7 +719,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
     segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);
     segment->decommit_expire = 0;
-    segment->decommit_mask = mi_commit_mask_empty();
+    mi_commit_mask_create_empty( &segment->decommit_mask );
   }
 
   // initialize segment info
diff --git a/test/test-stress.c b/test/test-stress.c
index a9568dd9..100c6d66 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -39,12 +39,12 @@ static size_t use_one_size = 0;              // use single object size of `N * s
 
 // #define USE_STD_MALLOC
 #ifdef USE_STD_MALLOC
-#define custom_calloc(n,s)    calloc(n,s)
+#define custom_calloc(n,s)    malloc(n*s)
 #define custom_realloc(p,s)   realloc(p,s)
 #define custom_free(p)        free(p)
 #else
 #include <mimalloc.h>
-#define custom_calloc(n,s)    mi_calloc(n,s)
+#define custom_calloc(n,s)    mi_malloc(n*s)
 #define custom_realloc(p,s)   mi_realloc(p,s)
 #define custom_free(p)        mi_free(p)
 #endif

From 8cc7d0c0195642f94cd9fc347e621d3652beeb9b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 10 Nov 2021 16:29:53 -0800
Subject: [PATCH 116/352] increase segment size to 64MiB

---
 include/mimalloc-types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 82d74f7f..8d1e5149 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -106,7 +106,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
 #define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB
-#define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  //  8MiB
+#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64MiB
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB
@@ -127,7 +127,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128KiB on 64-bit
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   
 
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 4MiB on 64-bit
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32MiB on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 
 #define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)

From 49d64dbc9571516dc8298f6bebc34ebf9d89afc8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 10 Nov 2021 16:30:21 -0800
Subject: [PATCH 117/352] save decommit_mask for segments in the segment cache

---
 include/mimalloc-internal.h |  4 +--
 src/options.c               |  4 +--
 src/segment-cache.c         | 18 ++++++++----
 src/segment.c               | 22 +++++++++++---
 test/main-override.cpp      | 57 +++++++++++++++++++++++++++++--------
 5 files changed, 79 insertions(+), 26 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 8b642c31..7ffa0023 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -85,8 +85,8 @@ void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinn
 void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld);
 
 // "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
+void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, mi_commit_mask_t decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
 
diff --git a/src/options.c b/src/options.c
index 5f2eedec..dbd4158c 100644
--- a/src/options.c
+++ b/src/options.c
@@ -89,8 +89,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 500,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
-  { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
+  { 100,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
diff --git a/src/segment-cache.c b/src/segment-cache.c
index d7604502..6513204d 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -22,13 +22,14 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
 
 #define BITS_SET()          ATOMIC_VAR_INIT(UINTPTR_MAX)
-#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)
+#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)                          // note: update if MI_CACHE_FIELDS changes
 
 typedef struct mi_cache_slot_s {
   void*               p;
   size_t              memid;
   bool                is_pinned;
   mi_commit_mask_t    commit_mask;
+  mi_commit_mask_t    decommit_mask;
   _Atomic(mi_msecs_t) expire;
 } mi_cache_slot_t;
 
@@ -39,8 +40,10 @@ static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIEL
 static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
 
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
+  if (_mi_preloading()) return NULL;
+
 #ifdef MI_CACHE_DISABLE
   return NULL;
 #else
@@ -76,11 +79,11 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
   *memid = slot->memid;
   *is_pinned = slot->is_pinned;
   *is_zero = false;
-  mi_commit_mask_t cmask = slot->commit_mask;  // copy
+  *commit_mask = slot->commit_mask;     
+  *decommit_mask = slot->decommit_mask;
   slot->p = NULL;
   mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
-  *commit_mask = cmask;
-
+  
   // mark the slot as free again
   mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
   _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
@@ -140,6 +143,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
           // decommit committed parts
           // TODO: instead of decommit, we could also free to the OS?
           mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
+          slot->decommit_mask = mi_commit_mask_empty();
         }
         _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
@@ -148,7 +152,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
   }
 }
 
-mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
+mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, mi_commit_mask_t decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
   return false;
@@ -188,11 +192,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   slot->is_pinned = is_pinned;
   mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
   slot->commit_mask = commit_mask;
+  slot->decommit_mask = decommit_mask;
   if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
     long delay = mi_option_get(mi_option_segment_decommit_delay);
     if (delay == 0) {
       _mi_abandoned_await_readers(); // wait until safe to decommit
       mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
+      slot->decommit_mask = mi_commit_mask_empty();
     }
     else {
       mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
diff --git a/src/segment.c b/src/segment.c
index 6ae3d9af..1533d281 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -256,7 +256,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
-  if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
+  if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
     const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size);
     if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
     _mi_abandoned_await_readers();  // wait until safe to free
@@ -650,12 +650,13 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   bool is_zero = false;
   const bool commit_info_still_good = (segment != NULL);
   mi_commit_mask_t commit_mask = (segment != NULL ? segment->commit_mask : mi_commit_mask_empty());
+  mi_commit_mask_t decommit_mask = (segment != NULL ? segment->decommit_mask : mi_commit_mask_empty());
   if (segment==NULL) {
     // Allocate the segment from the OS
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
     bool is_pinned = false;
     size_t memid = 0;
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
+    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
     if (segment==NULL) {
       segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
       if (segment == NULL) return NULL;  // failed to allocate
@@ -691,9 +692,22 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   if (!commit_info_still_good) {
     segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
     segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);
-    segment->decommit_expire = 0;
-    segment->decommit_mask = mi_commit_mask_empty();
+    if (segment->allow_decommit) {
+      segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+      segment->decommit_mask = decommit_mask;
+      mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask));
+      #if MI_DEBUG>2
+      const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
+      mi_assert_internal(!mi_commit_mask_any_set(segment->decommit_mask, mi_commit_mask_create(0, commit_needed)));
+      #endif
+    }    
+    else {
+      mi_assert_internal(mi_commit_mask_is_empty(decommit_mask));
+      segment->decommit_expire = 0;
+      segment->decommit_mask = mi_commit_mask_empty();
+    }
   }
+  
 
   // initialize segment info
   segment->segment_slices = segment_slices;
diff --git a/test/main-override.cpp b/test/main-override.cpp
index 32011c67..37734d37 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -35,22 +35,24 @@ static void test_mt_shutdown();
 static void large_alloc(void);        // issue #363
 static void fail_aslr();              // issue #372
 static void tsan_numa_test();         // issue #414
-static void strdup_test();     // issue #445
+static void strdup_test();            // issue #445 
+static void bench_alloc_large(void);  // issue #xxx
 
 int main() {
   mi_stats_reset();  // ignore earlier allocations
-  
-  heap_thread_free_large();
-  heap_no_delete();
-  heap_late_free();
-  padding_shrink();
-  various_tests();
-  large_alloc();
-  tsan_numa_test();
-  strdup_test();
+
+   heap_thread_free_large();
+   heap_no_delete();
+   heap_late_free();
+   padding_shrink();
+   various_tests();
+   large_alloc();
+   tsan_numa_test();
+   strdup_test();
 
   //test_mt_shutdown();
   //fail_aslr();
+  //bench_alloc_large();
   mi_stats_print(NULL);
   return 0;
 }
@@ -246,11 +248,42 @@ static void fail_aslr() {
 // issues #414
 static void dummy_worker() {
   void* p = mi_malloc(0);
-  mi_free(p);  
+  mi_free(p);
 }
 
 static void tsan_numa_test() {
   auto t1 = std::thread(dummy_worker);
   dummy_worker();
   t1.join();
-}
\ No newline at end of file
+}
+
+// issue #?
+#include <chrono>
+#include <random>
+#include <iostream>
+
+static void bench_alloc_large(void) {
+  static constexpr int kNumBuffers = 20;
+  static constexpr size_t kMinBufferSize = 5 * 1024 * 1024;
+  static constexpr size_t kMaxBufferSize = 25 * 1024 * 1024;
+  std::unique_ptr<char[]> buffers[kNumBuffers];
+
+  std::random_device rd;
+  std::mt19937 gen(42); //rd());
+  std::uniform_int_distribution<> size_distribution(kMinBufferSize, kMaxBufferSize);
+  std::uniform_int_distribution<> buf_number_distribution(0, kNumBuffers - 1);
+
+  static constexpr int kNumIterations = 2000;
+  const auto start = std::chrono::steady_clock::now();
+  for (int i = 0; i < kNumIterations; ++i) {
+    int buffer_idx = buf_number_distribution(gen);
+    size_t new_size = size_distribution(gen);
+    buffers[buffer_idx] = std::make_unique<char[]>(new_size);
+  }
+  const auto end = std::chrono::steady_clock::now();
+  const auto num_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+  const auto us_per_allocation = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / kNumIterations;
+  std::cout << kNumIterations << " allocations Done in " << num_ms << "ms." << std::endl;
+  std::cout << "Avg " << us_per_allocation << " us per allocation" << std::endl;
+}
+

From b1aff903f5622a549572bc833473ee2295b17844 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 11 Nov 2021 17:45:41 -0800
Subject: [PATCH 118/352] fix decommit bug

---
 include/mimalloc-internal.h | 119 +++-------------------------------
 include/mimalloc-types.h    |  10 +--
 src/options.c               |   4 +-
 src/segment.c               | 124 ++++++++++++++++++++++++++++++++++--
 test/main-override.cpp      |   2 +-
 5 files changed, 133 insertions(+), 126 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 51a6c916..4e05c724 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -693,37 +693,14 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
 
 
 static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
-  memset(cm, 0, sizeof(*cm));
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    cm->mask[i] = 0;
+  }
 }
 
 static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
-  memset(cm, 0xFF, sizeof(*cm));
-}
-
-static inline void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) {
-  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
-  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
-  if (bitcount == MI_COMMIT_MASK_BITS) {
-    mi_assert_internal(bitidx==0);
-    mi_commit_mask_create_full(cm);
-  }
-  else if (bitcount == 0) {
-    mi_commit_mask_create_empty(cm);
-  }
-  else {
-    mi_commit_mask_create_empty(cm);
-    ptrdiff_t i   = bitidx / MI_COMMIT_MASK_FIELD_BITS;
-    ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS;
-    while (bitcount > 0) {
-      mi_assert_internal(i < MI_COMMIT_MASK_N);
-      ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs;
-      ptrdiff_t count = (bitcount > avail ? avail : bitcount);
-      size_t mask = (((size_t)1 << count) - 1) << ofs;
-      cm->mask[i] = mask;
-      bitcount -= count;
-      ofs = 0;
-      i++;
-    }    
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    cm->mask[i] = ~((size_t)0);
   }
 }
 
@@ -773,70 +750,9 @@ static inline void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mas
   }
 }
 
-static inline size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) {  
-  mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
-  size_t count = 0;
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
-    size_t mask = cm->mask[i];
-    if (~mask == 0) {
-      count += MI_COMMIT_MASK_FIELD_BITS;
-    }
-    else {
-      for (; mask != 0; mask >>= 1) {  // todo: use popcount
-        if ((mask&1)!=0) count++;
-      }
-    }
-  }  
-  // we use total since for huge segments each commit bit may represent a larger size
-  return (total / MI_COMMIT_MASK_BITS)* count;
-}
-
-
-static inline ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx ) {
-  ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS;
-  ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS;
-  size_t mask = 0;
-  // find first ones
-  while (i < MI_COMMIT_MASK_N) {
-    mask = cm->mask[i];
-    mask >>= ofs;
-    if (mask != 0) {
-      while ((mask&1) == 0) {
-        mask >>= 1;
-        ofs++;
-      }
-      break;
-    }
-    i++;
-    ofs = 0;
-  }
-  if (i >= MI_COMMIT_MASK_N) {
-    // not found
-    *idx = MI_COMMIT_MASK_BITS;
-    return 0;
-  }
-  else {
-    // found, count ones
-    ptrdiff_t count = 0;
-    *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs;
-    mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1);
-    do {
-      do {
-        count++;
-        mask >>= 1;
-      } while (mask != 0);
-      if ((((count + ofs) % MI_COMMIT_MASK_FIELD_BITS) == 0)) {
-        i++;
-        if (i >= MI_COMMIT_MASK_N) break;
-        mask = cm->mask[i];
-        if ((mask&1)==0) break;
-        ofs = 0;
-      }
-    } while (mask != 0);
-    mi_assert_internal(count > 0);
-    return count;
-  }
-}
+void      mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm);
+size_t    mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
+ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx);
 
 #define mi_commit_mask_foreach(cm,idx,count) \
   idx = 0; \
@@ -847,25 +763,6 @@ static inline ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrd
   }
       
 
-#define xmi_commit_mask_foreach(mask,idx,count) \
-  idx = 0; \
-  while (mask != 0) {     \
-    /* count ones */      \
-    count = 0;            \
-    while ((mask&1)==1) { \
-      mask >>= 1;         \
-      count++;            \
-    }                     \
-    /* if found, do action */ \
-    if (count > 0) {
-
-#define xmi_commit_mask_foreach_end() \
-    } \
-    idx += count; \
-    /* shift out the zero */ \
-    mask >>= 1;   \
-    idx++;        \
-  }
 
 // -------------------------------------------------------------------
 // Fast "random" shuffle
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 1742fced..c60457c8 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -125,14 +125,14 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 // Derived constants
-#define MI_SEGMENT_SIZE                   (1ULL<<MI_SEGMENT_SHIFT)
+#define MI_SEGMENT_SIZE                   (1<<MI_SEGMENT_SHIFT)
 #define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
 #define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
-#define MI_SEGMENT_SLICE_SIZE             (1ULL<< MI_SEGMENT_SLICE_SHIFT)
+#define MI_SEGMENT_SLICE_SIZE             (1<< MI_SEGMENT_SLICE_SHIFT)
 #define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 128
 
-#define MI_SMALL_PAGE_SIZE                (1ULL<<MI_SMALL_PAGE_SHIFT)
-#define MI_MEDIUM_PAGE_SIZE               (1ULL<<MI_MEDIUM_PAGE_SHIFT)
+#define MI_SMALL_PAGE_SIZE                (1<<MI_SMALL_PAGE_SHIFT)
+#define MI_MEDIUM_PAGE_SIZE               (1<<MI_MEDIUM_PAGE_SHIFT)
 
 #define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 8KiB on 64-bit
 
@@ -287,7 +287,7 @@ typedef enum mi_segment_kind_e {
   MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
 } mi_segment_kind_t;
 
-#define MI_COMMIT_SIZE             (128*1024)   
+#define MI_COMMIT_SIZE             (4*64*1024)   
 #define MI_COMMIT_MASK_BITS        (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS  MI_SIZE_BITS
 #define MI_COMMIT_MASK_N           (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
diff --git a/src/options.c b/src/options.c
index 5ea7a92a..925ecbf9 100644
--- a/src/options.c
+++ b/src/options.c
@@ -89,8 +89,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 10,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
-  { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
+  { 100,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
diff --git a/src/segment.c b/src/segment.c
index 582953de..93548f24 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -15,6 +15,111 @@ terms of the MIT license. A copy of the license can be found in the file
 
 static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
 
+// -------------------------------------------------------------------
+// commit mask
+// -------------------------------------------------------------------
+
+void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) {
+  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
+  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
+  if (bitcount == MI_COMMIT_MASK_BITS) {
+    mi_assert_internal(bitidx==0);
+    mi_commit_mask_create_full(cm);
+  }
+  else if (bitcount == 0) {
+    mi_commit_mask_create_empty(cm);
+  }
+  else {
+    mi_commit_mask_create_empty(cm);
+    ptrdiff_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS;
+    ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS;
+    while (bitcount > 0) {
+      mi_assert_internal(i < MI_COMMIT_MASK_N);
+      ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs;
+      ptrdiff_t count = (bitcount > avail ? avail : bitcount);
+      size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs);
+      cm->mask[i] = mask;
+      bitcount -= count;
+      ofs = 0;
+      i++;
+    }
+  }
+}
+
+
+size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) {
+  mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
+  size_t count = 0;
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+    size_t mask = cm->mask[i];
+    if (~mask == 0) {
+      count += MI_COMMIT_MASK_FIELD_BITS;
+    }
+    else {
+      for (; mask != 0; mask >>= 1) {  // todo: use popcount
+        if ((mask&1)!=0) count++;
+      }
+    }
+  }
+  // we use total since for huge segments each commit bit may represent a larger size
+  return ((total / MI_COMMIT_MASK_BITS) * count);
+}
+
+
+ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) {
+  ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS;
+  ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS;
+  size_t mask = 0;
+  // find first ones
+  while (i < MI_COMMIT_MASK_N) {
+    mask = cm->mask[i];
+    mask >>= ofs;
+    if (mask != 0) {
+      while ((mask&1) == 0) {
+        mask >>= 1;
+        ofs++;
+      }
+      break;
+    }
+    i++;
+    ofs = 0;
+  }
+  if (i >= MI_COMMIT_MASK_N) {
+    // not found
+    *idx = MI_COMMIT_MASK_BITS;
+    return 0;
+  }
+  else {
+    // found, count ones
+    ptrdiff_t count = 0;
+    *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs;
+    do {
+      mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1);
+      do {
+        count++;
+        mask >>= 1;
+      } while ((mask&1) == 1);
+      if ((((*idx + count) % MI_COMMIT_MASK_FIELD_BITS) == 0)) {
+        i++;
+        if (i >= MI_COMMIT_MASK_N) break;
+        mask = cm->mask[i];
+        ofs = 0;
+      }
+    } while ((mask&1) == 1);
+    mi_assert_internal(count > 0);
+    return count;
+  }
+}
+
+#define mi_commit_mask_foreach(cm,idx,count) \
+  idx = 0; \
+  while ((count = mi_commit_mask_next_run(cm,&idx)) > 0) { 
+
+#define mi_commit_mask_foreach_end() \
+    idx += count; \
+  }
+
+
 /* --------------------------------------------------------------------------------
   Segment allocation
 
@@ -374,9 +479,11 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
 }
 
 static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
+
   // commit liberal, but decommit conservative
-  uint8_t* start;
-  size_t   full_size;
+  uint8_t* start = NULL;
+  size_t   full_size = 0;
   mi_commit_mask_t mask;
   mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask);
   if (mi_commit_mask_is_empty(&mask) || full_size==0) return true;
@@ -391,10 +498,14 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   }
   else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) {
     mi_assert_internal((void*)start != (void*)segment);
+    //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask));
+
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
     _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
-    if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); } // ok if this fails
+    if (segment->allow_decommit) { 
+      _mi_os_decommit(start, full_size, stats); // ok if this fails
+    } 
     mi_commit_mask_clear(&segment->commit_mask, &mask);
   }
   // increase expiration of reusing part of the delayed decommit
@@ -403,7 +514,6 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   }
   // always undo delayed decommits
   mi_commit_mask_clear(&segment->decommit_mask, &mask);   
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
   return true;
 }
 
@@ -420,8 +530,8 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
   }
   else {
     // register for future decommit in the decommit mask
-    uint8_t* start;
-    size_t   full_size;
+    uint8_t* start = NULL;
+    size_t   full_size = 0;
     mi_commit_mask_t mask; 
     mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask);
     if (mi_commit_mask_is_empty(&mask) || full_size==0) return;
@@ -719,7 +829,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
 
   if (!commit_info_still_good) {
     segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
-    segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);
+    segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
     if (segment->allow_decommit) {
       segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
       segment->decommit_mask = decommit_mask;
diff --git a/test/main-override.cpp b/test/main-override.cpp
index 37734d37..8834f2c7 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -52,7 +52,7 @@ int main() {
 
   //test_mt_shutdown();
   //fail_aslr();
-  //bench_alloc_large();
+  bench_alloc_large();
   mi_stats_print(NULL);
   return 0;
 }

From c6b82a4b37b8bdf0ccc754371492c632c3376311 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 12 Nov 2021 17:31:21 -0800
Subject: [PATCH 119/352] wip: change decommit expiration

---
 CMakeLists.txt |  2 +-
 src/options.c  |  2 +-
 src/segment.c  | 16 +++++++++++++++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7f880f74..bc4b3a51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,7 +176,7 @@ endif()
 
 # Compiler flags
 if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
-  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
+  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden   $<$<CONFIG:RELEASE>:-O3>)
   if(NOT MI_USE_CXX)
     list(APPEND mi_cflags -Wstrict-prototypes)
   endif()
diff --git a/src/options.c b/src/options.c
index dbd4158c..21edd97c 100644
--- a/src/options.c
+++ b/src/options.c
@@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 100,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 50,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
   { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
diff --git a/src/segment.c b/src/segment.c
index 1533d281..1ab7328e 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -416,7 +416,21 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     
     // update delayed commit
     mi_commit_mask_set(&segment->decommit_mask, mi_commit_mask_intersect(mask,segment->commit_mask));  // only decommit what is committed; span_free may try to decommit more
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+    mi_msecs_t now = _mi_clock_now();
+    if (segment->decommit_expire == 0) {
+      // no previous decommits, initialize now
+      mi_assert_internal(mi_commit_mask_is_empty(segment->decommit_mask));
+      segment->decommit_expire = now + mi_option_get(mi_option_reset_delay);
+    }
+    else if (segment->decommit_expire <= now) {
+      // previous decommit mask already expired
+      // mi_segment_delayed_decommit(segment, true, stats);
+      segment->decommit_expire = now + 1;
+    }
+    else {
+      // previous decommit mask is not yet expired
+      // segment->decommit_expire++;
+    }
   }  
 }
 

From 9322123a9756ee98796f193dd30f582119e17b4c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 12 Nov 2021 19:32:57 -0800
Subject: [PATCH 120/352] start eager commit delay at N>2

---
 src/options.c | 2 +-
 src/segment.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/options.c b/src/options.c
index 21edd97c..e01e4665 100644
--- a/src/options.c
+++ b/src/options.c
@@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 50,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 50,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
   { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
diff --git a/src/segment.c b/src/segment.c
index a37252f5..794a0541 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -656,7 +656,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
 
   // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
-  const bool eager_delay = (_mi_current_thread_count() > 4 && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
+  const bool eager_delay = (_mi_current_thread_count() > 2 && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager || (required > 0); 
   

From f58b4d923ad5f565822a420623f4d90354458d8d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 12 Nov 2021 19:58:49 -0800
Subject: [PATCH 121/352] comment

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index 794a0541..01fbe022 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -425,7 +425,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
       // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + 1;
+      segment->decommit_expire = now + 1; // wait a tiny bit longer in case there is a series of free's
     }
     else {
       // previous decommit mask is not yet expired

From 5dc4ec48fe1a6d7eb4861ab811fd01e3646317ae Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 12 Nov 2021 21:15:11 -0800
Subject: [PATCH 122/352] lower default reset delay

---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index e01e4665..859b3871 100644
--- a/src/options.c
+++ b/src/options.c
@@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 50,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 25,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
   { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)

From 9afc253726fbe28015b3c37841e41c9202d382ef Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 14:03:16 -0800
Subject: [PATCH 123/352] add comments, renaming

---
 include/mimalloc-internal.h | 50 +++++----------------------
 include/mimalloc-types.h    | 21 ++++++++----
 src/segment.c               | 68 +++++++++++++++++++++++++------------
 3 files changed, 70 insertions(+), 69 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index caf5a784..6b416b17 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -19,6 +19,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_CACHE_LINE          64
 #if defined(_MSC_VER)
 #pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
+#pragma warning(disable:26812)  // unscoped enum warning
 #define mi_decl_noinline        __declspec(noinline)
 #define mi_decl_thread          __declspec(thread)
 #define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
@@ -696,72 +697,39 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
 // commit mask
 // -------------------------------------------------------------------
 
-
 static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     cm->mask[i] = 0;
   }
 }
 
 static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     cm->mask[i] = ~((size_t)0);
   }
 }
 
 static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     if (cm->mask[i] != 0) return false;
   }
   return true;
 }
 
 static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     if (cm->mask[i] != 0) return false;
   }
   return true;
 }
 
-static inline bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
-    if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false;
-  }
-  return true;
-}
-
-static inline bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
-    if ((commit->mask[i] & cm->mask[i]) != 0) return true;
-  }
-  return false;
-}
-
-static inline void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
-    res->mask[i] = (commit->mask[i] & cm->mask[i]);
-  }
-}
-
-static inline void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
-    res->mask[i] &= ~(cm->mask[i]);
-  }
-}
-  
-static inline void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
-    res->mask[i] |= cm->mask[i];
-  }
-}
-
-void      mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm);
-size_t    mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
-ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx);
+// defined in `segment.c`:
+size_t    _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
+ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx);
 
 #define mi_commit_mask_foreach(cm,idx,count) \
   idx = 0; \
-  while ((count = mi_commit_mask_next_run(cm,&idx)) > 0) { 
+  while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { 
         
 #define mi_commit_mask_foreach_end() \
     idx += count; \
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index c60457c8..5bf779fa 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -287,17 +287,26 @@ typedef enum mi_segment_kind_e {
   MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
 } mi_segment_kind_t;
 
-#define MI_COMMIT_SIZE             (4*64*1024)   
-#define MI_COMMIT_MASK_BITS        (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
-#define MI_COMMIT_MASK_FIELD_BITS  MI_SIZE_BITS
-#define MI_COMMIT_MASK_N           (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
+// ------------------------------------------------------
+// A segment holds a commit mask where a bit is set if
+// the corresponding MI_COMMIT_SIZE area is committed.
+// The MI_COMMIT_SIZE must be a multiple of the slice
+// size. We define it as equal so we can decommit on a
+// slice level which helps with (real) memory fragmentation
+// over time.
+// ------------------------------------------------------
 
-#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_N * MI_COMMIT_MASK_FIELD_BITS))
+#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)   
+#define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
+#define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
+#define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
+
+#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS))
 #error "the segment size must be exactly divisible by the (commit size * size_t bits)"
 #endif
 
 typedef struct mi_commit_mask_s {
-  size_t mask[MI_COMMIT_MASK_N];
+  size_t mask[MI_COMMIT_MASK_FIELD_COUNT];
 } mi_commit_mask_t;
 
 typedef mi_page_t  mi_slice_t;
diff --git a/src/segment.c b/src/segment.c
index da1664b0..d953438a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -15,11 +15,44 @@ terms of the MIT license. A copy of the license can be found in the file
 
 static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
 
+
 // -------------------------------------------------------------------
-// commit mask
+// commit mask 
 // -------------------------------------------------------------------
 
-void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) {
+static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false;
+  }
+  return true;
+}
+
+static bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    if ((commit->mask[i] & cm->mask[i]) != 0) return true;
+  }
+  return false;
+}
+
+static void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    res->mask[i] = (commit->mask[i] & cm->mask[i]);
+  }
+}
+
+static void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    res->mask[i] &= ~(cm->mask[i]);
+  }
+}
+
+static void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+    res->mask[i] |= cm->mask[i];
+  }
+}
+
+static void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) {
   mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
   mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
   if (bitcount == MI_COMMIT_MASK_BITS) {
@@ -34,7 +67,7 @@ void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_
     ptrdiff_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS;
     ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS;
     while (bitcount > 0) {
-      mi_assert_internal(i < MI_COMMIT_MASK_N);
+      mi_assert_internal(i < MI_COMMIT_MASK_FIELD_COUNT);
       ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs;
       ptrdiff_t count = (bitcount > avail ? avail : bitcount);
       size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs);
@@ -46,11 +79,10 @@ void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_
   }
 }
 
-
-size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) {
+size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) {
   mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
   size_t count = 0;
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) {
+  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     size_t mask = cm->mask[i];
     if (~mask == 0) {
       count += MI_COMMIT_MASK_FIELD_BITS;
@@ -66,12 +98,12 @@ size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) {
 }
 
 
-ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) {
+ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) {
   ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS;
   ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS;
   size_t mask = 0;
   // find first ones
-  while (i < MI_COMMIT_MASK_N) {
+  while (i < MI_COMMIT_MASK_FIELD_COUNT) {
     mask = cm->mask[i];
     mask >>= ofs;
     if (mask != 0) {
@@ -84,7 +116,7 @@ ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) {
     i++;
     ofs = 0;
   }
-  if (i >= MI_COMMIT_MASK_N) {
+  if (i >= MI_COMMIT_MASK_FIELD_COUNT) {
     // not found
     *idx = MI_COMMIT_MASK_BITS;
     return 0;
@@ -101,7 +133,7 @@ ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) {
       } while ((mask&1) == 1);
       if ((((*idx + count) % MI_COMMIT_MASK_FIELD_BITS) == 0)) {
         i++;
-        if (i >= MI_COMMIT_MASK_N) break;
+        if (i >= MI_COMMIT_MASK_FIELD_COUNT) break;
         mask = cm->mask[i];
         ofs = 0;
       }
@@ -111,14 +143,6 @@ ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) {
   }
 }
 
-#define mi_commit_mask_foreach(cm,idx,count) \
-  idx = 0; \
-  while ((count = mi_commit_mask_next_run(cm,&idx)) > 0) { 
-
-#define mi_commit_mask_foreach_end() \
-    idx += count; \
-  }
-
 
 /* --------------------------------------------------------------------------------
   Segment allocation
@@ -289,7 +313,7 @@ static size_t mi_segment_info_size(mi_segment_t* segment) {
 static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size)
 {
   ptrdiff_t idx = slice - segment->slices;
-  size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE;
+  size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
   // make the start not OS page aligned for smaller blocks to avoid page/cache effects
   size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); 
   if (page_size != NULL) *page_size = psize - start_offset;
@@ -362,7 +386,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
   if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
-    const size_t csize = mi_commit_mask_committed_size(&segment->commit_mask, size);
+    const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
     if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
     _mi_abandoned_await_readers();  // wait until safe to free
     _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os);
@@ -502,7 +526,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
     bool is_zero = false;
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
-    _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
+    _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
     if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;    
     mi_commit_mask_set(&segment->commit_mask, &mask);     
   }
@@ -512,7 +536,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
 
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
-    _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
+    _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
     if (segment->allow_decommit) { 
       _mi_os_decommit(start, full_size, stats); // ok if this fails
     } 

From 88e6b52b884ccdda4a989434fc4d57226257c9fb Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 15:25:51 -0800
Subject: [PATCH 124/352] fix types to size_t

---
 include/mimalloc-internal.h | 12 ++++++------
 src/segment-cache.c         |  6 +++---
 src/segment.c               | 34 +++++++++++++++++-----------------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index cd6a4b6f..dd5c384a 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -698,34 +698,34 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
 // -------------------------------------------------------------------
 
 static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     cm->mask[i] = 0;
   }
 }
 
 static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     cm->mask[i] = ~((size_t)0);
   }
 }
 
 static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     if (cm->mask[i] != 0) return false;
   }
   return true;
 }
 
 static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     if (cm->mask[i] != 0) return false;
   }
   return true;
 }
 
 // defined in `segment.c`:
-size_t    _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
-ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx);
+size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
+size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
 
 #define mi_commit_mask_foreach(cm,idx,count) \
   idx = 0; \
diff --git a/src/segment-cache.c b/src/segment-cache.c
index d773658d..cabdec8f 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -100,9 +100,9 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
   else {
     // todo: one call to decommit the whole at once?
     mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
-    size_t    part = total/MI_COMMIT_MASK_BITS;
-    ptrdiff_t idx;
-    ptrdiff_t count;    
+    size_t part = total/MI_COMMIT_MASK_BITS;
+    size_t idx;
+    size_t count;    
     mi_commit_mask_foreach(cmask, idx, count) {
       void*  start = (uint8_t*)p + (idx*part);
       size_t size = count*part;
diff --git a/src/segment.c b/src/segment.c
index c164eb30..71fd9e9c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -21,38 +21,38 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
 // -------------------------------------------------------------------
 
 static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false;
   }
   return true;
 }
 
 static bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     if ((commit->mask[i] & cm->mask[i]) != 0) return true;
   }
   return false;
 }
 
 static void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     res->mask[i] = (commit->mask[i] & cm->mask[i]);
   }
 }
 
 static void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     res->mask[i] &= ~(cm->mask[i]);
   }
 }
 
 static void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     res->mask[i] |= cm->mask[i];
   }
 }
 
-static void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) {
+static void mi_commit_mask_create(size_t bitidx, size_t bitcount, mi_commit_mask_t* cm) {
   mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
   mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
   if (bitcount == MI_COMMIT_MASK_BITS) {
@@ -64,12 +64,12 @@ static void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commi
   }
   else {
     mi_commit_mask_create_empty(cm);
-    ptrdiff_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS;
-    ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS;
+    size_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS;
+    size_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS;
     while (bitcount > 0) {
       mi_assert_internal(i < MI_COMMIT_MASK_FIELD_COUNT);
-      ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs;
-      ptrdiff_t count = (bitcount > avail ? avail : bitcount);
+      size_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs;
+      size_t count = (bitcount > avail ? avail : bitcount);
       size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs);
       cm->mask[i] = mask;
       bitcount -= count;
@@ -82,7 +82,7 @@ static void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commi
 size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) {
   mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
   size_t count = 0;
-  for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
+  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     size_t mask = cm->mask[i];
     if (~mask == 0) {
       count += MI_COMMIT_MASK_FIELD_BITS;
@@ -98,9 +98,9 @@ size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total)
 }
 
 
-ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) {
-  ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS;
-  ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS;
+size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) {
+  size_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS;
+  size_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS;
   size_t mask = 0;
   // find first ones
   while (i < MI_COMMIT_MASK_FIELD_COUNT) {
@@ -123,7 +123,7 @@ ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) {
   }
   else {
     // found, count ones
-    ptrdiff_t count = 0;
+    size_t count = 0;
     *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs;
     do {
       mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1);
@@ -602,8 +602,8 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
   segment->decommit_expire = 0;
   mi_commit_mask_create_empty(&segment->decommit_mask);
 
-  ptrdiff_t idx;
-  ptrdiff_t count;
+  size_t idx;
+  size_t count;
   mi_commit_mask_foreach(&mask, idx, count) {
     // if found, decommit that sequence
     if (count > 0) {

From f1ce9228a173b9f6307785e0a9ad0af89a705f42 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 15:29:57 -0800
Subject: [PATCH 125/352] use size_t for bitmask

---
 include/mimalloc-types.h |  6 +++---
 src/segment.c            | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 24cffe6d..4d703f13 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -307,9 +307,9 @@ typedef enum mi_segment_kind_e {
   MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
 } mi_segment_kind_t;
 
-#define MI_COMMIT_SIZE    (MI_SEGMENT_SIZE/MI_INTPTR_BITS)
+#define MI_COMMIT_SIZE    (MI_SEGMENT_SIZE/MI_SIZE_BITS)
 
-#if (((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE) > 8*MI_INTPTR_SIZE)
+#if (((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE) > MI_SIZE_BITS)
 #error "not enough commit bits to cover the segment size"
 #endif
 
@@ -317,7 +317,7 @@ typedef mi_page_t  mi_slice_t;
 
 typedef int64_t    mi_msecs_t;
 
-typedef uintptr_t  mi_commit_mask_t;
+typedef size_t     mi_commit_mask_t;
 
 // Segments are large allocated memory blocks (8mb on 64 bit) from
 // the OS. Inside segments we allocated fixed size _pages_ that
diff --git a/src/segment.c b/src/segment.c
index af72cdf5..5516a626 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -336,9 +336,9 @@ static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conse
   if (size == 0 || size > MI_SEGMENT_SIZE) return 0;
   if (p >= (uint8_t*)segment + mi_segment_size(segment)) return 0;
 
-  uintptr_t diff = (p - (uint8_t*)segment);
-  uintptr_t start;
-  uintptr_t end;
+  size_t diff = (p - (uint8_t*)segment);
+  size_t start;
+  size_t end;
   if (conservative) {
     start = _mi_align_up(diff, MI_COMMIT_SIZE);
     end   = _mi_align_down(diff + size, MI_COMMIT_SIZE);
@@ -353,14 +353,14 @@ static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conse
   *full_size = (end > start ? end - start : 0);
   if (*full_size == 0) return 0;
 
-  uintptr_t bitidx = start / MI_COMMIT_SIZE;
-  mi_assert_internal(bitidx < (MI_INTPTR_SIZE*8));
+  size_t bitidx = start / MI_COMMIT_SIZE;
+  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
   
-  uintptr_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
+  size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
   if (bitidx + bitcount > MI_INTPTR_SIZE*8) {
     _mi_warning_message("commit mask overflow: %zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size);
   }
-  mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8));
+  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
 
   return mi_commit_mask_create(bitidx, bitcount);
 }
@@ -443,8 +443,8 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
   segment->decommit_expire = 0;
   segment->decommit_mask = mi_commit_mask_empty();
 
-  uintptr_t idx;
-  uintptr_t count;
+  size_t idx;
+  size_t count;
   mi_commit_mask_foreach(mask, idx, count) {
     // if found, decommit that sequence
     if (count > 0) {

From b72065f04bb5928f994e6f2f21cd79328558a08f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 15:50:26 -0800
Subject: [PATCH 126/352] move commit mask functions to segment.c

---
 include/mimalloc-internal.h | 53 ++---------------------------
 src/segment.c               | 66 ++++++++++++++++++++++++++++++++++---
 2 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 993ba754..20a26dac 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -702,21 +702,6 @@ static inline mi_commit_mask_t mi_commit_mask_full(void) {
   return ~mi_commit_mask_empty();
 }
 
-static inline mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) {
-  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
-  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
-  if (bitcount == MI_COMMIT_MASK_BITS) {
-    mi_assert_internal(bitidx==0);
-    return mi_commit_mask_full();
-  }
-  else if (bitcount == 0) {
-    return mi_commit_mask_empty();
-  }
-  else {
-    return (((uintptr_t)1 << bitcount) - 1) << bitidx;
-  }
-}
-
 static inline bool mi_commit_mask_is_empty(mi_commit_mask_t mask) {
   return (mask == 0);
 }
@@ -725,42 +710,7 @@ static inline bool mi_commit_mask_is_full(mi_commit_mask_t mask) {
   return ((~mask) == 0);
 }
 
-static inline bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
-  return ((commit & mask) == mask);
-}
-
-static inline bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
-  return ((commit & mask) != 0);
-}
-
-mi_decl_nodiscard static inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) {
-  return (commit & mask);
-}
-
-static inline void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
-  *commit = (*commit) & (~mask);
-}
-
-static inline void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
-  *commit = (*commit) | mask;
-}
-
-static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total) {
-  if (mi_commit_mask_is_full(mask)) {
-    return total;
-  }
-  else if (mi_commit_mask_is_empty(mask)) {
-    return 0;
-  }
-  else {
-    size_t count = 0;
-    for (; mask != 0; mask >>= 1) {  // todo: use popcount
-      if ((mask&1)!=0) count++;
-    }
-    return (total/MI_COMMIT_MASK_BITS)*count;
-  }
-}
-
+size_t _mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total);
 
 #define mi_commit_mask_foreach(mask,idx,count) \
   idx = 0; \
@@ -782,6 +732,7 @@ static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t
     idx++;        \
   }
 
+
 // -------------------------------------------------------------------
 // Fast "random" shuffle
 // -------------------------------------------------------------------
diff --git a/src/segment.c b/src/segment.c
index 5516a626..0b9502f2 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -23,6 +23,64 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
   be reclaimed by still running threads, much like work-stealing.
 -------------------------------------------------------------------------------- */
 
+// -------------------------------------------------------------------
+// commit mask
+// -------------------------------------------------------------------
+
+static mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) {
+  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
+  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
+  if (bitcount == MI_COMMIT_MASK_BITS) {
+    mi_assert_internal(bitidx==0);
+    return mi_commit_mask_full();
+  }
+  else if (bitcount == 0) {
+    return mi_commit_mask_empty();
+  }
+  else {
+    return (((uintptr_t)1 << bitcount) - 1) << bitidx;
+  }
+}
+
+
+static bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
+  return ((commit & mask) == mask);
+}
+
+static bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mask_t mask) {
+  return ((commit & mask) != 0);
+}
+
+mi_decl_nodiscard static mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) {
+  return (commit & mask);
+}
+
+static void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
+  *commit = (*commit) & (~mask);
+}
+
+static void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) {
+  *commit = (*commit) | mask;
+}
+
+size_t _mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total) {
+  if (mi_commit_mask_is_full(mask)) {
+    return total;
+  }
+  else if (mi_commit_mask_is_empty(mask)) {
+    return 0;
+  }
+  else {
+    size_t count = 0;
+    for (; mask != 0; mask >>= 1) {  // todo: use popcount
+      if ((mask&1)!=0) count++;
+    }
+    return (total/MI_COMMIT_MASK_BITS)*count;
+  }
+}
+
+
+
 /* -----------------------------------------------------------
    Slices
 ----------------------------------------------------------- */
@@ -257,7 +315,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
   if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
-    const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size);
+    const size_t csize = _mi_commit_mask_committed_size(segment->commit_mask, size);
     if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
     _mi_abandoned_await_readers();  // wait until safe to free
     _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os);
@@ -358,7 +416,7 @@ static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conse
   
   size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
   if (bitidx + bitcount > MI_INTPTR_SIZE*8) {
-    _mi_warning_message("commit mask overflow: %zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size);
+    _mi_warning_message("commit mask overflow: idx=%zu count=%zu start=%zx end=%zx p=0x%p size=%zu fullsize=%zu\n", bitidx, bitcount, start, end, p, size, *full_size);
   }
   mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
 
@@ -375,14 +433,14 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   if (commit && !mi_commit_mask_all_set(segment->commit_mask, mask)) {
     bool is_zero = false;
     mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask);
-    _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
+    _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
     if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;    
     mi_commit_mask_set(&segment->commit_mask,mask);     
   }
   else if (!commit && mi_commit_mask_any_set(segment->commit_mask,mask)) {
     mi_assert_internal((void*)start != (void*)segment);
     mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask);
-    _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
+    _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap
     if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); } // ok if this fails
     mi_commit_mask_clear(&segment->commit_mask, mask);
   }

From 12bfd18ba7b1b275ceb0a7339063c4d1f420b5da Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 16:15:03 -0800
Subject: [PATCH 127/352] fix commit mask for huge segments

---
 include/mimalloc-internal.h | 2 +-
 src/segment.c               | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 4e90a774..cd3b0fde 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -718,7 +718,7 @@ static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
 
 static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
   for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    if (cm->mask[i] != 0) return false;
+    if (cm->mask[i] != ~((size_t)0)) return false;
   }
   return true;
 }
diff --git a/src/segment.c b/src/segment.c
index fee51e43..36a96699 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -463,8 +463,9 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
 
 static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) {
   mi_assert_internal(_mi_ptr_segment(p) == segment);
+  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   mi_commit_mask_create_empty(cm);
-  if (size == 0 || size > MI_SEGMENT_SIZE) return;
+  if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return;
   const size_t segsize = mi_segment_size(segment);
   if (p >= (uint8_t*)segment + segsize) return;
 
@@ -546,6 +547,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
 
 static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
   mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
+  // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow
   if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed
   return mi_segment_commitx(segment,true,p,size,stats);
 }
@@ -930,6 +932,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   }
   else {
     mi_assert_internal(huge_page!=NULL);
+    mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
+    mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask));
     *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld);
     mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance 
   }

From cdfbd6d08fff0d2c9409ae40ddc15c756740227b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 17:12:21 -0800
Subject: [PATCH 128/352] decommit when abandoned segments move to the visited
 list

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index 0b9502f2..1c46242d 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1315,7 +1315,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
     }
     else {
       // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_decommit(segment, false, tld->stats); // decommit if needed
+      mi_segment_delayed_decommit(segment, true, tld->stats); // decommit if needed
       mi_abandoned_visited_push(segment);
     }
   }

From fb5645a30d914535ebb4721f2fd549c11f91880a Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 19:41:41 -0800
Subject: [PATCH 129/352] increase decommit hysterisis

---
 src/segment.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index e8d84a19..2907ddf2 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -541,7 +541,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
     segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
   }
   // always undo delayed decommits
-  mi_commit_mask_clear(&segment->decommit_mask, &mask);   
+  mi_commit_mask_clear(&segment->decommit_mask, &mask);
   return true;
 }
 
@@ -579,11 +579,11 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
       // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + 1; // wait a tiny bit longer in case there is a series of free's
+      segment->decommit_expire = now + 5; // wait a tiny bit longer in case there is a series of free's
     }
     else {
       // previous decommit mask is not yet expired
-      // segment->decommit_expire++;
+      // segment->decommit_expire += 1; // = now + mi_option_get(mi_option_reset_delay);
     }
   }  
 }

From fa66db840d4e2c2d4747823b1fa192bc4405c793 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 19:43:52 -0800
Subject: [PATCH 130/352] increase decommit hysterisis

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index 1c46242d..03c58fdd 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -483,7 +483,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
       // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + 1; // wait a tiny bit longer in case there is a series of free's
+      segment->decommit_expire = now + 5; // wait a tiny bit longer in case there is a series of free's
     }
     else {
       // previous decommit mask is not yet expired

From 511a8996f3568ba59ca019067173d1242a5dd786 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 13 Nov 2021 20:12:03 -0800
Subject: [PATCH 131/352] increase commit mask blocks to 2xslice size

---
 include/mimalloc-types.h | 7 +++----
 src/segment.c            | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index cd12418b..6b9da839 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -308,12 +308,11 @@ typedef enum mi_segment_kind_e {
 // A segment holds a commit mask where a bit is set if
 // the corresponding MI_COMMIT_SIZE area is committed.
 // The MI_COMMIT_SIZE must be a multiple of the slice
-// size. We define it as equal so we can decommit on a
-// slice level which helps with (real) memory fragmentation
-// over time.
+// size. If it is equal we have the most fine grained 
+// decommit but in practice 2x seems to perform better.
 // ------------------------------------------------------
 
-#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)   
+#define MI_COMMIT_SIZE              (2*MI_SEGMENT_SLICE_SIZE)   
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
 #define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
diff --git a/src/segment.c b/src/segment.c
index 2907ddf2..30c2e22b 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -583,7 +583,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     }
     else {
       // previous decommit mask is not yet expired
-      // segment->decommit_expire += 1; // = now + mi_option_get(mi_option_reset_delay);
+      // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_reset_delay);
     }
   }  
 }

From f039774cf5aeb7e6f536fd29d6d26e56b433c742 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 14 Nov 2021 11:26:30 -0800
Subject: [PATCH 132/352] adjust decommit delay

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index c8a8b9b1..57737099 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -483,7 +483,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
       // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + 5; // wait a tiny bit longer in case there is a series of free's
+      segment->decommit_expire = now + (mi_option_get(mi_option_reset_delay) / 8); // wait a tiny bit longer in case there is a series of free's
     }
     else {
       // previous decommit mask is not yet expired

From 32170897ddd7daf8398322659e0c2f1b99fd1547 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 14 Nov 2021 11:45:28 -0800
Subject: [PATCH 133/352] make decommit size equal to slice size

---
 include/mimalloc-types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 6b9da839..772e8839 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -309,10 +309,10 @@ typedef enum mi_segment_kind_e {
 // the corresponding MI_COMMIT_SIZE area is committed.
 // The MI_COMMIT_SIZE must be a multiple of the slice
 // size. If it is equal we have the most fine grained 
-// decommit but in practice 2x seems to perform better.
+// decommit (but in practice 2x seems to perform better).
 // ------------------------------------------------------
 
-#define MI_COMMIT_SIZE              (2*MI_SEGMENT_SLICE_SIZE)   
+#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)   
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
 #define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)

From 70547b5f1698358d4232258d26190952c2d5dc27 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 14 Nov 2021 12:09:20 -0800
Subject: [PATCH 134/352] fix slice count

---
 include/mimalloc-types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 772e8839..068513ba 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -141,7 +141,7 @@ typedef int32_t  mi_ssize_t;
 #define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
 #define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
 #define MI_SEGMENT_SLICE_SIZE             (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
-#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 128
+#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
 
 #define MI_SMALL_PAGE_SIZE                (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
 #define MI_MEDIUM_PAGE_SIZE               (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)

From c520901069eb525bcb38ef83e560c0bfbfe3e855 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 14 Nov 2021 12:10:07 -0800
Subject: [PATCH 135/352] fix slice count comment

---
 include/mimalloc-types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 4d703f13..ce84aa97 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -141,7 +141,7 @@ typedef int32_t  mi_ssize_t;
 #define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
 #define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
 #define MI_SEGMENT_SLICE_SIZE             (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
-#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 128
+#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
 
 #define MI_SMALL_PAGE_SIZE                (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
 #define MI_MEDIUM_PAGE_SIZE               (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)

From 5a05fd446a6656096fbff2ed1c464b70b57ec6a3 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 14 Nov 2021 14:38:24 -0800
Subject: [PATCH 136/352] fix compilation on macos

---
 src/os.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/os.c b/src/os.c
index 169680a1..3113a098 100644
--- a/src/os.c
+++ b/src/os.c
@@ -472,13 +472,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   }  
   #if defined(PROT_MAX)
   protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
-  #endif  
-  #if defined(VM_MAKE_TAG)
-  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
-  int os_tag = (int)mi_option_get(mi_option_os_tag);
-  if (os_tag < 100 || os_tag > 255) { os_tag = 100; }
-  fd = VM_MAKE_TAG(os_tag);
-  #endif
+  #endif    
   // huge page allocation
   if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
     static _Atomic(size_t) large_page_try_ok; // = 0;

From f412df7a2b64421e1f1d61fde6055a6ea288e8f5 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 14 Nov 2021 16:51:41 -0800
Subject: [PATCH 137/352] make segment size smaller on 32-bit

---
 include/mimalloc-types.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 068513ba..68990626 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -128,9 +128,14 @@ typedef int32_t  mi_ssize_t;
 // ------------------------------------------------------
 
 // Main tuning parameters for segment and page sizes
-// Sizes for 64-bit, divide by two for 32-bit
-#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB
+// Sizes for 64-bit (usually divide by two for 32-bit)
+#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
+
+#if MI_INTPTR_SIZE > 4
 #define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64MiB
+#else
+#define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  // 4MiB on 32-bit
+#endif
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB

From 6efd78c5e0825b3cf502f1eacc13a8e03d4aaefe Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 15 Nov 2021 10:52:39 -0800
Subject: [PATCH 138/352] remove O3 flag

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e5a46381..8a7cc5e0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -178,7 +178,7 @@ endif()
 
 # Compiler flags
 if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
-  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden   $<$<CONFIG:RELEASE>:-O3>)
+  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
   if(NOT MI_USE_CXX)
     list(APPEND mi_cflags -Wstrict-prototypes)
   endif()  

From 72a33c37ef14abc24d3a5cdbb2be806fd24cb382 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 18 Dec 2021 11:34:02 -0800
Subject: [PATCH 139/352] merge from dev

---
 include/mimalloc-types.h | 4 ++--
 src/segment.c            | 2 +-
 test/test-api.c          | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 9f97f8f5..957115c8 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -172,13 +172,13 @@ typedef int32_t  mi_ssize_t;
 #endif
 
 // Maximum slice offset (15)
-#define MI_MAX_SLICE_OFFSET               ((MI_ALIGNED_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
+#define MI_MAX_SLICE_OFFSET               ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
 
 // Used as a special value to encode block sizes in 32 bits.
 #define MI_HUGE_BLOCK_SIZE                ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
 
 // blocks up to this size are always allocated aligned
-#define MI_MAX_ALIGN_GUARANTEE  (8*MI_MAX_ALIGN_SIZE)  
+#define MI_MAX_ALIGN_GUARANTEE            (8*MI_MAX_ALIGN_SIZE)  
 
 
 
diff --git a/src/segment.c b/src/segment.c
index 7b2fa28e..3001f160 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -317,7 +317,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
   size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
   // make the start not OS page aligned for smaller blocks to avoid page/cache effects
   size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); 
-  if (page_size != NULL) *page_size = psize - start_offset;
+  if (page_size != NULL) { *page_size = psize - start_offset; }
   return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
 }
 
diff --git a/test/test-api.c b/test/test-api.c
index 96817337..f057799a 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -163,7 +163,7 @@ int main(void) {
     for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) {
       void* ps[8];
       for (int i = 0; i < 8 && ok; i++) {
-        ps[i] = mi_malloc_aligned(align/2 /*size*/, align);
+        ps[i] = mi_malloc_aligned(align*13 /*size*/, align);
         if (ps[i] == NULL || (uintptr_t)(ps[i]) % align != 0) {
           ok = false;
         }

From f317225a70929fea9be62e15945c2e8890cf6a1a Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 10 Jan 2022 12:10:18 -0800
Subject: [PATCH 140/352] ignore reset_decommits option in the 2.x / dev-slice
 version

---
 include/mimalloc-internal.h |  2 +-
 src/options.c               |  2 +-
 src/os.c                    | 15 +++++++--------
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 348bef4b..45775df4 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -83,7 +83,7 @@ bool       _mi_os_unprotect(void* addr, size_t size);
 bool       _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
 bool       _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
 bool       _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
-bool       _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+// bool       _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 size_t     _mi_os_good_alloc_size(size_t size);
 bool       _mi_os_has_overcommit(void);
 
diff --git a/src/options.c b/src/options.c
index f7dbc620..b8bac750 100644
--- a/src/options.c
+++ b/src/options.c
@@ -72,7 +72,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
   #else
   { 1, UNINIT, MI_OPTION(eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset uses MADV_FREE/MADV_DONTNEED
+  { 0, UNINIT, MI_OPTION(reset_decommits) },     // legacy; ignored now and reset always uses MADV_FREE/MADV_DONTNEED (issue #518)
   #endif
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
diff --git a/src/os.c b/src/os.c
index 8aac3845..ac2d73d1 100644
--- a/src/os.c
+++ b/src/os.c
@@ -938,9 +938,12 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
   return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
 }
 
+/*
 static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {  
-  return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats);
+  return mi_os_commitx(addr, size, true, true // conservative
+                      , is_zero, stats);
 }
+*/
 
 // Signal to the OS that the address range is no longer in use
 // but may be used later again. This will release physical memory
@@ -1003,14 +1006,10 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
 bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
   MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_decommit(addr, size, stats);
-  }
-  else {
-    return mi_os_resetx(addr, size, true, stats);
-  }
+  return mi_os_resetx(addr, size, true, stats);
 }
 
+/*
 bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
   MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
@@ -1022,7 +1021,7 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat
     return mi_os_resetx(addr, size, false, stats);
   }
 }
-
+*/
 
 // Protect a region in memory to be not accessible.
 static  bool mi_os_protectx(void* addr, size_t size, bool protect) {

From bd2ac3c92e3e00ec02b09081a7678478b42abf65 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 2 Feb 2022 16:17:21 -0800
Subject: [PATCH 141/352] collect segment cache on mi_collect

---
 include/mimalloc-internal.h |  1 +
 src/heap.c                  |  5 ++++-
 src/segment-cache.c         | 15 ++++++++++-----
 test/test-stress.c          |  8 +++++---
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 45775df4..88142197 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -95,6 +95,7 @@ void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed,
 // "segment-cache.c"
 void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
+void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
 
diff --git a/src/heap.c b/src/heap.c
index d7975b0b..416a9a8d 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -147,11 +147,14 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
   mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
 
-  // collect segment caches
+  // collect segment local caches
   if (collect >= MI_FORCE) {
     _mi_segment_thread_collect(&heap->tld->segments);
   }
 
+  // decommit in global segment caches
+  _mi_segment_cache_collect( collect >= MI_FORCE, &heap->tld->os);  
+
   // collect regions on program-exit (or shared library unload)
   if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
     //_mi_mem_collect(&heap->tld->os);
diff --git a/src/segment-cache.c b/src/segment-cache.c
index cabdec8f..41d71e5e 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -115,13 +115,14 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 
 #define MI_MAX_PURGE_PER_PUSH  (4)
 
-static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
+static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld)
 {
   MI_UNUSED(tld);
   mi_msecs_t now = _mi_clock_now();
-  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
   size_t purged = 0;
-  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
+  const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
+  size_t idx              = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
+  for (size_t visited = 0; visited < max_visits; visited++,idx++) {  // visit N slots
     if (idx >= MI_CACHE_MAX) idx = 0; // wrap
     mi_cache_slot_t* slot = &cache[idx];
     mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
@@ -144,11 +145,15 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
         }
         _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
-      if (purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
+      if (!force && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
     }
   }
 }
 
+void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
+  mi_segment_cache_purge(force, tld );
+}
+
 mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
@@ -167,7 +172,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   }
 
   // purge expired entries
-  mi_segment_cache_purge(tld);
+  mi_segment_cache_purge(false /* force? */, tld);
 
   // find an available slot
   mi_bitmap_index_t bitidx;
diff --git a/test/test-stress.c b/test/test-stress.c
index 100c6d66..ff5fffeb 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -189,11 +189,13 @@ static void test_stress(void) {
         free_items(p);
       }
     }
+    #ifndef NDEBUG
     //mi_collect(false);
-    //mi_debug_show_arenas();    
-#if !defined(NDEBUG) || defined(MI_TSAN)
+    //mi_debug_show_arenas();
+    #endif    
+    #if !defined(NDEBUG) || defined(MI_TSAN)
     if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
-#endif
+    #endif
   }
 }
 

From ccfe00573172ce40c715629a8ed4691149227407 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 2 Feb 2022 17:08:05 -0800
Subject: [PATCH 142/352] decommit in abandoned pages on mi_collect

---
 include/mimalloc-internal.h |  1 +
 src/heap.c                  |  3 +++
 src/segment.c               | 31 +++++++++++++++++++++++++++----
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 88142197..8e8b5c9c 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -110,6 +110,7 @@ void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi
 uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
 void       _mi_abandoned_await_readers(void);
+void       _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
 
 
 
diff --git a/src/heap.c b/src/heap.c
index 416a9a8d..b0cae474 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -147,6 +147,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
   mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
 
+  // collect abandoned pages
+  _mi_abandoned_collect(heap, collect >= MI_FORCE, &heap->tld->segments);
+
   // collect segment local caches
   if (collect >= MI_FORCE) {
     _mi_segment_thread_collect(&heap->tld->segments);
diff --git a/src/segment.c b/src/segment.c
index 3001f160..980ca439 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1050,7 +1050,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 Abandonment
 
 When threads terminate, they can leave segments with
-live blocks (reached through other threads). Such segments
+live blocks (reachable through other threads). Such segments
 are "abandoned" and will be reclaimed by other threads to
 reuse their pages and/or free them eventually
 
@@ -1065,11 +1065,11 @@ or decommitting segments that have a pending read operation.
 
 Note: the current implementation is one possible design;
 another way might be to keep track of abandoned segments
-in the regions. This would have the advantage of keeping
+in the arenas/segment_cache's. This would have the advantage of keeping
 all concurrent code in one place and not needing to deal
 with ABA issues. The drawback is that it is unclear how to
 scan abandoned segments efficiently in that case as they
-would be spread among all other segments in the regions.
+would be spread among all other segments in the arenas.
 ----------------------------------------------------------- */
 
 // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
@@ -1431,7 +1431,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
     }
     else {
       // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_decommit(segment, true, tld->stats); // decommit if needed
+      mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed
       mi_abandoned_visited_push(segment);
     }
   }
@@ -1439,6 +1439,29 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
 }
 
 
+void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
+{
+  mi_segment_t* segment;
+  int max_tries = (force ? 16*1024 : 1024); // limit latency
+  if (force) {
+    mi_abandoned_visited_revisit(); 
+  }
+  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
+    mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees)
+    if (segment->used == 0) {
+      // free the segment (by forced reclaim) to make it available to other threads.
+      // note: we could in principle optimize this by skipping reclaim and directly
+      // freeing but that would violate some invariants temporarily)
+      mi_segment_reclaim(segment, heap, 0, NULL, tld);
+    }
+    else {
+      // otherwise, decommit if needed and push on the visited list 
+      mi_segment_delayed_decommit(segment, force, tld->stats); // forced decommit if needed
+      mi_abandoned_visited_push(segment);
+    }
+  }
+}
+
 /* -----------------------------------------------------------
    Reclaim or allocate
 ----------------------------------------------------------- */

From 932f8661053fd5b6325f2758119757b2662c11a8 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 2 Feb 2022 18:28:02 -0800
Subject: [PATCH 143/352] decommit segment cache on force collect

---
 src/segment-cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 41d71e5e..9b838cef 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -126,14 +126,14 @@ static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld
     if (idx >= MI_CACHE_MAX) idx = 0; // wrap
     mi_cache_slot_t* slot = &cache[idx];
     mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
-    if (expire != 0 && now >= expire) {  // racy read
+    if (expire != 0 && (force || now >= expire)) {  // racy read
       // seems expired, first claim it from available
       purged++;
       mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
       if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
         // was available, we claimed it
         expire = mi_atomic_loadi64_acquire(&slot->expire);
-        if (expire != 0 && now >= expire) {  // safe read
+        if (expire != 0 && (force || now >= expire)) {  // safe read
           // still expired, decommit it
           mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
           mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));

From 4e65b5018fd95df4f8ddcce53144bdb7e4d59b20 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 2 Feb 2022 19:01:41 -0800
Subject: [PATCH 144/352] clean up options

---
 doc/mimalloc-doc.h |  5 ++---
 include/mimalloc.h | 19 ++++++++--------
 src/options.c      | 56 +++++++++++++++++++++++++---------------------
 src/segment.c      | 16 ++++++-------
 4 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h
index 4cf8c2c3..6078f415 100644
--- a/doc/mimalloc-doc.h
+++ b/doc/mimalloc-doc.h
@@ -811,9 +811,8 @@ typedef enum mi_option_e {
   mi_option_segment_cache,   ///< The number of segments per thread to keep cached.
   mi_option_page_reset,      ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free.
   mi_option_segment_reset,   ///< Experimental
-  mi_option_reset_delay,     ///< Delay in milli-seconds before resetting a page (100ms by default)
+  mi_option_decommit_delay,  ///< Delay in milli-seconds before decommitting currently unused reserved memory (25ms by default)
   mi_option_use_numa_nodes,  ///< Pretend there are at most N NUMA nodes
-  mi_option_reset_decommits, ///< Experimental
   mi_option_eager_commit_delay,  ///< Experimental
   mi_option_os_tag,          ///< OS tag to assign to mimalloc'd memory
   _mi_option_last
@@ -1068,7 +1067,7 @@ or via environment variables.
 - `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages when not in use to signal to the OS
    that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
    programs. By setting it to `0` no such page resets will be done which can improve performance for programs that are not long
-   running. As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
+   running. As an alternative, the `MIMALLOC_DECOMMIT_DELAY=`<msecs> can be set higher (100ms by default) to make the page
    reset occur less frequently instead of turning it off completely.
 - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
    improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 98689d28..650948ea 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -306,27 +306,28 @@ typedef enum mi_option_e {
   mi_option_show_errors,
   mi_option_show_stats,
   mi_option_verbose,
-  // the following options are experimental
+  // some of the following options are experimental
+  // (deprecated options are kept for binary backward compatibility with v1.x versions)
   mi_option_eager_commit,
-  mi_option_eager_region_commit,
-  mi_option_reset_decommits,
-  mi_option_large_os_pages,         // implies eager commit
+  mi_option_deprecated_eager_region_commit,
+  mi_option_deprecated_reset_decommits,
+  mi_option_large_os_pages,           // implies eager commit
   mi_option_reserve_huge_os_pages,
   mi_option_reserve_huge_os_pages_at,
   mi_option_reserve_os_memory,
   mi_option_segment_cache,
   mi_option_page_reset,
-  mi_option_abandoned_page_reset,
-  mi_option_segment_reset,
+  mi_option_abandoned_page_decommit,
+  mi_option_deprecated_segment_reset,
   mi_option_eager_commit_delay,
-  mi_option_allow_decommit,
-  mi_option_reset_delay,
-  mi_option_segment_decommit_delay,
+  mi_option_decommit_delay,
   mi_option_use_numa_nodes,
   mi_option_limit_os_alloc,
   mi_option_os_tag,
   mi_option_max_errors,
   mi_option_max_warnings,
+  mi_option_allow_decommit,
+  mi_option_segment_decommit_delay,  
   _mi_option_last
 } mi_option_t;
 
diff --git a/src/options.c b/src/options.c
index b8bac750..388be2e6 100644
--- a/src/options.c
+++ b/src/options.c
@@ -49,54 +49,50 @@ typedef struct mi_option_desc_s {
   mi_init_t   init;   // is it initialized yet? (from the environment)
   mi_option_t option; // for debugging: the option index should match the option
   const char* name;   // option name without `mimalloc_` prefix
+  const char* legacy_name; // potential legacy v1.x option name
 } mi_option_desc_t;
 
-#define MI_OPTION(opt)        mi_option_##opt, #opt
-#define MI_OPTION_DESC(opt)   {0, UNINIT, MI_OPTION(opt) }
+#define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
+#define MI_OPTION_LEGACY(opt,legacy)    mi_option_##opt, #opt, #legacy
 
 static mi_option_desc_t options[_mi_option_last] =
 {
   // stable options
-#if MI_DEBUG || defined(MI_SHOW_ERRORS)
+  #if MI_DEBUG || defined(MI_SHOW_ERRORS)
   { 1, UNINIT, MI_OPTION(show_errors) },
-#else
+  #else
   { 0, UNINIT, MI_OPTION(show_errors) },
-#endif
+  #endif
   { 0, UNINIT, MI_OPTION(show_stats) },
   { 0, UNINIT, MI_OPTION(verbose) },
 
-  // the following options are experimental and not all combinations make sense.
+  // Some of the following options are experimental and not all combinations are valid. Use with care.
   { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
-  #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
-  { 0, UNINIT, MI_OPTION(eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
-  #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // legacy; ignored now and reset always uses MADV_FREE/MADV_DONTNEED (issue #518)
-  #endif
+  { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) },
+  { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) },
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
   { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
   { 0, UNINIT, MI_OPTION(reserve_os_memory)     },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
-  { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
-  { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
-#if defined(__NetBSD__)
+  { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates  
+  { 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
+  #if defined(__NetBSD__)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-#elif defined(_WIN32)
+  #elif defined(_WIN32)
   { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
-#else
+  #else
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
-#endif
-  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 25,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
-  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
+  #endif
+  { 25,   UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
-  { 16,   UNINIT, MI_OPTION(max_warnings) }       // maximum warnings that are output
+  { 16,   UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
+  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
+  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) } // decommit delay in milli-seconds for freed segments
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
@@ -526,11 +522,21 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
 
 static void mi_option_init(mi_option_desc_t* desc) {  
   // Read option value from the environment
+  char s[64+1];
   char buf[64+1];
   mi_strlcpy(buf, "mimalloc_", sizeof(buf));
   mi_strlcat(buf, desc->name, sizeof(buf));
-  char s[64+1];
-  if (mi_getenv(buf, s, sizeof(s))) {
+  bool found = mi_getenv(buf,s,sizeof(s));
+  if (!found && desc->legacy_name != NULL) {
+    mi_strlcpy(buf, "mimalloc_", sizeof(buf));
+    mi_strlcat(buf, desc->legacy_name, sizeof(buf));
+    found = mi_getenv(buf,s,sizeof(s));
+    if (found) {
+      _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name );
+    }    
+  }
+
+  if (found) {
     size_t len = strlen(s);
     if (len >= sizeof(buf)) len = sizeof(buf) - 1;
     for (size_t i = 0; i < len; i++) {
diff --git a/src/segment.c b/src/segment.c
index 980ca439..94c2f184 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -538,7 +538,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   }
   // increase expiration of reusing part of the delayed decommit
   if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) {
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
   }
   // always undo delayed decommits
   mi_commit_mask_clear(&segment->decommit_mask, &mask);
@@ -554,7 +554,7 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_
 
 static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
   if (!segment->allow_decommit) return;
-  if (mi_option_get(mi_option_reset_delay) == 0) {
+  if (mi_option_get(mi_option_decommit_delay) == 0) {
     mi_segment_commitx(segment, false, p, size, stats);
   }
   else {
@@ -569,21 +569,21 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
     mi_commit_mask_set(&segment->decommit_mask, &cmask);
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
     mi_msecs_t now = _mi_clock_now();
     if (segment->decommit_expire == 0) {
       // no previous decommits, initialize now
       mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
-      segment->decommit_expire = now + mi_option_get(mi_option_reset_delay);
+      segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay);
     }
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
       // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + (mi_option_get(mi_option_reset_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+      segment->decommit_expire = now + (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
     }
     else {
       // previous decommit mask is not yet expired
-      // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_reset_delay);
+      // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_decommit_delay);
     }
   }  
 }
@@ -877,7 +877,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
     segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
     if (segment->allow_decommit) {
-      segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+      segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
       segment->decommit_mask = decommit_mask;
       mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
       #if MI_DEBUG>2
@@ -1245,7 +1245,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   }
 
   // perform delayed decommits
-  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset) /* force? */, tld->stats);    
+  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats);    
   
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);

From 741d39a0042b48793471f1e9e9217f9efe82efa2 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 3 Feb 2022 14:26:56 -0800
Subject: [PATCH 145/352] fix over aggressive decommit of abandoned pages

---
 src/heap.c    | 31 ++++++++++++++++++-------------
 src/segment.c | 11 ++++++-----
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/src/heap.c b/src/heap.c
index b0cae474..4fdfb0b9 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -115,17 +115,20 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
 static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
 {
   if (heap==NULL || !mi_heap_is_initialized(heap)) return;
-  _mi_deferred_free(heap, collect >= MI_FORCE);
+
+  const bool force = collect >= MI_FORCE;  
+  _mi_deferred_free(heap, force);
 
   // note: never reclaim on collect but leave it to threads that need storage to reclaim 
-  if (
-  #ifdef NDEBUG
+  const bool force_main = 
+    #ifdef NDEBUG
       collect == MI_FORCE
-  #else
+    #else
       collect >= MI_FORCE
-  #endif
-    && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim)
-  {
+    #endif
+      && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim;
+
+  if (force_main) {
     // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
     // if all memory is freed by now, all segments should be freed.
     _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
@@ -141,25 +144,27 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   _mi_heap_delayed_free(heap);
 
   // collect retired pages
-  _mi_heap_collect_retired(heap, collect >= MI_FORCE);
+  _mi_heap_collect_retired(heap, force);
 
   // collect all pages owned by this thread
   mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
   mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
 
-  // collect abandoned pages
-  _mi_abandoned_collect(heap, collect >= MI_FORCE, &heap->tld->segments);
+  // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list)
+  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
+  _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments);
 
   // collect segment local caches
-  if (collect >= MI_FORCE) {
+  if (force) {
     _mi_segment_thread_collect(&heap->tld->segments);
   }
 
   // decommit in global segment caches
-  _mi_segment_cache_collect( collect >= MI_FORCE, &heap->tld->os);  
+  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
+  _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os);  
 
   // collect regions on program-exit (or shared library unload)
-  if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
+  if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
     //_mi_mem_collect(&heap->tld->os);
   }
 }
diff --git a/src/segment.c b/src/segment.c
index 94c2f184..037b1316 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -569,7 +569,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
     mi_commit_mask_set(&segment->decommit_mask, &cmask);
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
+    // segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
     mi_msecs_t now = _mi_clock_now();
     if (segment->decommit_expire == 0) {
       // no previous decommits, initialize now
@@ -582,8 +582,8 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
       segment->decommit_expire = now + (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
     }
     else {
-      // previous decommit mask is not yet expired
-      // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_decommit_delay);
+      // previous decommit mask is not yet expired, increase the expiration by a bit.
+      segment->decommit_expire += (mi_option_get(mi_option_decommit_delay) / 8);
     }
   }  
 }
@@ -1431,7 +1431,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
     }
     else {
       // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed
+      mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again
       mi_abandoned_visited_push(segment);
     }
   }
@@ -1456,7 +1456,8 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
     }
     else {
       // otherwise, decommit if needed and push on the visited list 
-      mi_segment_delayed_decommit(segment, force, tld->stats); // forced decommit if needed
+      // note: forced decommit can be expensive if many threads are destroyed/created as in mstress.
+      mi_segment_delayed_decommit(segment, force, tld->stats);
       mi_abandoned_visited_push(segment);
     }
   }

From 0e1beb0018f45ab13bdfd567f67d7deecd08084f Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Thu, 3 Feb 2022 15:51:27 -0800
Subject: [PATCH 146/352] check for decommit allowed before purging the segment
 cache

---
 src/segment-cache.c | 1 +
 src/segment.c       | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 9b838cef..93908c8f 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -118,6 +118,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld)
 {
   MI_UNUSED(tld);
+  if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
   mi_msecs_t now = _mi_clock_now();
   size_t purged = 0;
   const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
diff --git a/src/segment.c b/src/segment.c
index 037b1316..e9d30510 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -569,8 +569,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
     mi_commit_mask_set(&segment->decommit_mask, &cmask);
-    // segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
-    mi_msecs_t now = _mi_clock_now();
+    mi_msecs_t now = _mi_clock_now();    
     if (segment->decommit_expire == 0) {
       // no previous decommits, initialize now
       mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));

From fb418831dfffaf9f89dce9f0793294995d839a1e Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 4 Feb 2022 16:10:51 -0800
Subject: [PATCH 147/352] only delay eager commit after the first thread

---
 src/segment.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index e9d30510..9f474ca5 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -578,11 +578,11 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
       // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+      segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
     }
     else {
       // previous decommit mask is not yet expired, increase the expiration by a bit.
-      segment->decommit_expire += (mi_option_get(mi_option_decommit_delay) / 8);
+      segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay);
     }
   }  
 }
@@ -809,7 +809,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
 
   // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
   const bool eager_delay = (!_mi_os_has_overcommit() &&             // never delay on overcommit systems
-                            _mi_current_thread_count() > 2 &&       // do not delay for the first N threads
+                            _mi_current_thread_count() > 1 &&       // do not delay for the first N threads
                             tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager || (required > 0); 

From 0e2df71829597295c94426539d00c08414d2725b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 4 Feb 2022 16:11:38 -0800
Subject: [PATCH 148/352] increase minimal commit size to 8*slice-size and add
 decommit_extend_delay as option

---
 include/mimalloc-types.h | 4 ++--
 include/mimalloc.h       | 1 +
 src/options.c            | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 41364286..8cd3a4c3 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -320,10 +320,10 @@ typedef enum mi_segment_kind_e {
 // the corresponding MI_COMMIT_SIZE area is committed.
 // The MI_COMMIT_SIZE must be a multiple of the slice
 // size. If it is equal we have the most fine grained 
-// decommit (but in practice 2x seems to perform better).
+// decommit but setting it higher can be more efficient.
 // ------------------------------------------------------
 
-#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)   
+#define MI_COMMIT_SIZE              (8*MI_SEGMENT_SLICE_SIZE)   
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
 #define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 06597e9a..08805845 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -328,6 +328,7 @@ typedef enum mi_option_e {
   mi_option_max_warnings,
   mi_option_allow_decommit,
   mi_option_segment_decommit_delay,  
+  mi_option_decommit_extend_delay,
   _mi_option_last
 } mi_option_t;
 
diff --git a/src/options.c b/src/options.c
index 388be2e6..d2e61218 100644
--- a/src/options.c
+++ b/src/options.c
@@ -92,7 +92,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
   { 16,   UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
-  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) } // decommit delay in milli-seconds for freed segments
+  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
+  { 2,    UNINIT, MI_OPTION(decommit_extend_delay) }  
 };
 
 static void mi_option_init(mi_option_desc_t* desc);

From e11100a13780297d7016eba0fcf541c85f60c16b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Feb 2022 10:57:15 -0800
Subject: [PATCH 149/352] add minimal commit size for increased efficiency
 (decommit fine grained, commit coarse grained)

---
 include/mimalloc-types.h | 9 +++++++--
 src/segment.c            | 7 +++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 8cd3a4c3..63549792 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -320,10 +320,15 @@ typedef enum mi_segment_kind_e {
 // the corresponding MI_COMMIT_SIZE area is committed.
 // The MI_COMMIT_SIZE must be a multiple of the slice
 // size. If it is equal we have the most fine grained 
-// decommit but setting it higher can be more efficient.
+// decommit (but setting it higher can be more efficient).
+// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will
+// be committed in one go which can be set higher than
+// MI_COMMIT_SIZE for efficiency (while the decommit mask
+// is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_COMMIT_SIZE              (8*MI_SEGMENT_SLICE_SIZE)   
+#define MI_MINIMAL_COMMIT_SIZE      (16*MI_SEGMENT_SLICE_SIZE)           // 1MiB
+#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
 #define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
diff --git a/src/segment.c b/src/segment.c
index e8c80d29..0970046d 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -470,17 +470,20 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
   if (p >= (uint8_t*)segment + segsize) return;
 
   size_t diff = (p - (uint8_t*)segment);
+  mi_assert_internal(diff + size <= segsize);
+
   size_t start;
   size_t end;
   if (conservative) {
+    // decommit conservative
     start = _mi_align_up(diff, MI_COMMIT_SIZE);
     end   = _mi_align_down(diff + size, MI_COMMIT_SIZE);
   }
   else {
+    // commit liberal
     start = _mi_align_down(diff, MI_COMMIT_SIZE);
-    end   = _mi_align_up(diff + size, MI_COMMIT_SIZE);
+    end   = _mi_align_up(diff + size, MI_MINIMAL_COMMIT_SIZE);
   }
-  mi_assert_internal(end <= segsize);
   if (end > segsize) {
     end = segsize;
   }

From 8ec83f6945133e299290354ca74f65e906c8b163 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Feb 2022 11:21:47 -0800
Subject: [PATCH 150/352] increase min commit to 2 mib

---
 include/mimalloc-types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 63549792..310fb92b 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -327,8 +327,8 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (16*MI_SEGMENT_SLICE_SIZE)           // 1MiB
-#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)
+#define MI_MINIMAL_COMMIT_SIZE      (2*MI_MiB)
+#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
 #define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)

From 47f8caad4db06314d080b798f29b91e25dd51e76 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Feb 2022 17:23:28 -0800
Subject: [PATCH 151/352] improve commit chunk alignment

---
 src/segment.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 0970046d..b000e641 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -466,28 +466,35 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
   mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   mi_commit_mask_create_empty(cm);
   if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return;
+  const size_t segstart = mi_segment_info_size(segment);
   const size_t segsize = mi_segment_size(segment);
   if (p >= (uint8_t*)segment + segsize) return;
 
-  size_t diff = (p - (uint8_t*)segment);
-  mi_assert_internal(diff + size <= segsize);
+  size_t pstart = (p - (uint8_t*)segment);
+  mi_assert_internal(pstart + size <= segsize);
 
   size_t start;
   size_t end;
   if (conservative) {
     // decommit conservative
-    start = _mi_align_up(diff, MI_COMMIT_SIZE);
-    end   = _mi_align_down(diff + size, MI_COMMIT_SIZE);
+    start = _mi_align_up(pstart, MI_COMMIT_SIZE);
+    end   = _mi_align_down(pstart + size, MI_COMMIT_SIZE);
+    mi_assert_internal(start >= segstart);
+    mi_assert_internal(end <= segsize);
   }
   else {
     // commit liberal
-    start = _mi_align_down(diff, MI_COMMIT_SIZE);
-    end   = _mi_align_up(diff + size, MI_MINIMAL_COMMIT_SIZE);
+    start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE);
+    end   = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE);
+  }
+  if (start < segstart) {
+    start = segstart;
   }
   if (end > segsize) {
     end = segsize;
   }
 
+  mi_assert_internal(start <= pstart && (pstart + size) <= end);
   mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0);
   *start_p   = (uint8_t*)segment + start;
   *full_size = (end > start ? end - start : 0);
@@ -504,14 +511,19 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
   mi_commit_mask_create(bitidx, bitcount, cm);
 }
 
-#define MI_COMMIT_SIZE_BATCH  MiB
 
 static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
   mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
 
-  //if (commit && size < MI_COMMIT_SIZE_BATCH && p + MI_COMMIT_SIZE_BATCH <= mi_segment_end(segment)) {
-  //  size = MI_COMMIT_SIZE_BATCH;
-  // }
+  // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes.
+  /*
+  if (commit && size > 0) {
+    const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE);
+    if (p + csize <= mi_segment_end(segment)) {
+      size = csize;
+    }
+  }
+  */
   // commit liberal, but decommit conservative
   uint8_t* start = NULL;
   size_t   full_size = 0;
@@ -569,13 +581,13 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     if (mi_commit_mask_is_empty(&mask) || full_size==0) return;
     
     // update delayed commit
+    mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask));      
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
     mi_commit_mask_set(&segment->decommit_mask, &cmask);
     mi_msecs_t now = _mi_clock_now();    
     if (segment->decommit_expire == 0) {
       // no previous decommits, initialize now
-      mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
       segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay);
     }
     else if (segment->decommit_expire <= now) {
@@ -609,7 +621,8 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
       mi_segment_commitx(segment, false, p, size, stats);
     }
   }
-  mi_commit_mask_foreach_end()  
+  mi_commit_mask_foreach_end()
+  mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
 }
 
 
@@ -893,6 +906,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
       mi_assert_internal(mi_commit_mask_is_empty(&decommit_mask));
       segment->decommit_expire = 0;
       mi_commit_mask_create_empty( &segment->decommit_mask );
+      mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
     }
   }
   

From f2b6938d64d555f2053612da2e84fcb128bd9116 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 5 Feb 2022 17:36:14 -0800
Subject: [PATCH 152/352] fix start adjustment for the commit mask

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index b000e641..c4cf9875 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -487,7 +487,7 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
     start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE);
     end   = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE);
   }
-  if (start < segstart) {
+  if (pstart >= segstart && start < segstart) {  // note: the mask is also calculated for an initial commit of the info area
     start = segstart;
   }
   if (end > segsize) {

From e87b1d2298313f2ec47da0d76dbfc195742126fc Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 10 Feb 2022 11:08:13 -0800
Subject: [PATCH 153/352] add extra huge allocation test

---
 test/main-override-static.c | 351 +++++++++++++++++++-----------------
 1 file changed, 182 insertions(+), 169 deletions(-)

diff --git a/test/main-override-static.c b/test/main-override-static.c
index afb9131e..07116503 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -8,172 +8,6 @@
 #include <mimalloc-override.h>  // redefines malloc etc.
 
 
-#include <stdint.h>
-#include <stdbool.h>
-
-#define MI_INTPTR_SIZE 8
-#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
-
-#define MI_BIN_HUGE 100
-//#define MI_ALIGN2W
-
-// Bit scan reverse: return the index of the highest bit.
-static inline uint8_t mi_bsr32(uint32_t x);
-
-#if defined(_MSC_VER)
-#include <windows.h>
-#include <intrin.h>
-static inline uint8_t mi_bsr32(uint32_t x) {
-  uint32_t idx;
-  _BitScanReverse((DWORD*)&idx, x);
-  return idx;
-}
-#elif defined(__GNUC__) || defined(__clang__)
-static inline uint8_t mi_bsr32(uint32_t x) {
-  return (31 - __builtin_clz(x));
-}
-#else
-static inline uint8_t mi_bsr32(uint32_t x) {
-  // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
-  static const uint8_t debruijn[32] = {
-     31,  0, 22,  1, 28, 23, 18,  2, 29, 26, 24, 10, 19,  7,  3, 12,
-     30, 21, 27, 17, 25,  9,  6, 11, 20, 16,  8,  5, 15,  4, 14, 13,
-  };
-  x |= x >> 1;
-  x |= x >> 2;
-  x |= x >> 4;
-  x |= x >> 8;
-  x |= x >> 16;
-  x++;
-  return debruijn[(x*0x076be629) >> 27];
-}
-#endif
-
-/*
-// Bit scan reverse: return the index of the highest bit.
-uint8_t _mi_bsr(uintptr_t x) {
-  if (x == 0) return 0;
-  #if MI_INTPTR_SIZE==8
-  uint32_t hi = (x >> 32);
-  return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
-  #elif MI_INTPTR_SIZE==4
-  return mi_bsr32(x);
-  #else
-  # error "define bsr for non-32 or 64-bit platforms"
-  #endif
-}
-*/
-
-
-static inline size_t _mi_wsize_from_size(size_t size) {
-  return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
-}
-
-// Return the bin for a given field size.
-// Returns MI_BIN_HUGE if the size is too large.
-// We use `wsize` for the size in "machine word sizes",
-// i.e. byte size == `wsize*sizeof(void*)`.
-extern inline uint8_t _mi_bin8(size_t size) {
-  size_t wsize = _mi_wsize_from_size(size);
-  uint8_t bin;
-  if (wsize <= 1) {
-    bin = 1;
-  }
-  #if defined(MI_ALIGN4W)
-  else if (wsize <= 4) {
-    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
-  }
-  #elif defined(MI_ALIGN2W)
-  else if (wsize <= 8) {
-    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
-  }
-  #else
-  else if (wsize <= 8) {
-    bin = (uint8_t)wsize;
-  }
-  #endif
-  else if (wsize > MI_LARGE_WSIZE_MAX) {
-    bin = MI_BIN_HUGE;
-  }
-  else {
-    #if defined(MI_ALIGN4W)
-    if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
-    #endif
-    wsize--;
-    // find the highest bit
-    uint8_t b = mi_bsr32((uint32_t)wsize);
-    // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
-    // - adjust with 3 because we use do not round the first 8 sizes
-    //   which each get an exact bin
-    bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
-  }
-  return bin;
-}
-
-extern inline uint8_t _mi_bin4(size_t size) {
-  size_t wsize = _mi_wsize_from_size(size);
-  uint8_t bin;
-  if (wsize <= 1) {
-    bin = 1;
-  }
-  #if defined(MI_ALIGN4W)
-  else if (wsize <= 4) {
-    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
-  }
-  #elif defined(MI_ALIGN2W)
-  else if (wsize <= 8) {
-    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
-  }
-  #else
-  else if (wsize <= 8) {
-    bin = (uint8_t)wsize;
-  }
-  #endif
-  else if (wsize > MI_LARGE_WSIZE_MAX) {
-    bin = MI_BIN_HUGE;
-  }
-  else {
-    uint8_t b = mi_bsr32((uint32_t)wsize);
-    bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
-  }
-  return bin;
-}
-
-size_t _mi_binx4(size_t bsize) {
-  if (bsize==0) return 0;
-  uint8_t b = mi_bsr32((uint32_t)bsize);
-  if (b <= 1) return bsize;
-  size_t bin =  ((b << 1) | (bsize >> (b - 1))&0x01);
-  return bin;
-}
-
-size_t _mi_binx8(size_t bsize) {
-  if (bsize<=1) return bsize;
-  uint8_t b = mi_bsr32((uint32_t)bsize);
-  if (b <= 2) return bsize;
-  size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
-  return bin;
-}
-
-void mi_bins() {
-  //printf("  QNULL(1), /* 0 */ \\\n  ");
-  size_t last_bin = 0;
-  size_t min_bsize = 0;
-  size_t last_bsize = 0;
-  for (size_t bsize = 1; bsize < 2*1024; bsize++) {
-    size_t size = bsize * 64 * 1024;
-    size_t bin = _mi_binx8(bsize);
-    if (bin != last_bin) {      
-      printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin);
-      //printf("QNULL(%6zd), ", wsize);
-      //if (last_bin%8 == 0) printf("/* %i */ \\\n  ", last_bin);
-      last_bin = bin;
-      min_bsize = bsize;
-    }
-    last_bsize = bsize;
-  }
-}
-
 static void double_free1();
 static void double_free2();
 static void corrupt_free();
@@ -183,7 +17,7 @@ static void test_aslr(void);
 static void test_process_info(void);
 static void test_reserved(void);
 static void negative_stat(void);
-
+static void alloc_huge(void);
 
 int main() {
   mi_version();
@@ -197,6 +31,7 @@ int main() {
   // invalid_free();
   // test_reserved();
   // negative_stat();
+  alloc_huge();
   
   void* p1 = malloc(78);
   void* p2 = malloc(24);
@@ -210,7 +45,7 @@ int main() {
   free(p1);
   free(p2);
   free(s);
-
+  
   /* now test if override worked by allocating/freeing across the api's*/
   //p1 = mi_malloc(32);
   //free(p1);
@@ -347,4 +182,182 @@ static void negative_stat(void) {
   *p = 100;
   mi_free(p);
   mi_stats_print_out(NULL, NULL);  
-}
\ No newline at end of file
+}
+
+static void alloc_huge(void) {
+  void* p = mi_malloc(67108872);
+  mi_free(p);
+}
+
+
+// ----------------------------
+// bin size experiments
+// ------------------------------
+
+#if 0
+#include <stdint.h>
+#include <stdbool.h>
+
+#define MI_INTPTR_SIZE 8
+#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
+
+#define MI_BIN_HUGE 100
+//#define MI_ALIGN2W
+
+// Bit scan reverse: return the index of the highest bit.
+static inline uint8_t mi_bsr32(uint32_t x);
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#include <intrin.h>
+static inline uint8_t mi_bsr32(uint32_t x) {
+  uint32_t idx;
+  _BitScanReverse((DWORD*)&idx, x);
+  return idx;
+}
+#elif defined(__GNUC__) || defined(__clang__)
+static inline uint8_t mi_bsr32(uint32_t x) {
+  return (31 - __builtin_clz(x));
+}
+#else
+static inline uint8_t mi_bsr32(uint32_t x) {
+  // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
+  static const uint8_t debruijn[32] = {
+     31,  0, 22,  1, 28, 23, 18,  2, 29, 26, 24, 10, 19,  7,  3, 12,
+     30, 21, 27, 17, 25,  9,  6, 11, 20, 16,  8,  5, 15,  4, 14, 13,
+  };
+  x |= x >> 1;
+  x |= x >> 2;
+  x |= x >> 4;
+  x |= x >> 8;
+  x |= x >> 16;
+  x++;
+  return debruijn[(x*0x076be629) >> 27];
+}
+#endif
+
+/*
+// Bit scan reverse: return the index of the highest bit.
+uint8_t _mi_bsr(uintptr_t x) {
+  if (x == 0) return 0;
+  #if MI_INTPTR_SIZE==8
+  uint32_t hi = (x >> 32);
+  return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
+  #elif MI_INTPTR_SIZE==4
+  return mi_bsr32(x);
+  #else
+  # error "define bsr for non-32 or 64-bit platforms"
+  #endif
+}
+*/
+
+
+static inline size_t _mi_wsize_from_size(size_t size) {
+  return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
+}
+
+// Return the bin for a given field size.
+// Returns MI_BIN_HUGE if the size is too large.
+// We use `wsize` for the size in "machine word sizes",
+// i.e. byte size == `wsize*sizeof(void*)`.
+extern inline uint8_t _mi_bin8(size_t size) {
+  size_t wsize = _mi_wsize_from_size(size);
+  uint8_t bin;
+  if (wsize <= 1) {
+    bin = 1;
+  }
+#if defined(MI_ALIGN4W)
+  else if (wsize <= 4) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+#elif defined(MI_ALIGN2W)
+  else if (wsize <= 8) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+#else
+  else if (wsize <= 8) {
+    bin = (uint8_t)wsize;
+  }
+#endif
+  else if (wsize > MI_LARGE_WSIZE_MAX) {
+    bin = MI_BIN_HUGE;
+  }
+  else {
+#if defined(MI_ALIGN4W)
+    if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
+#endif
+    wsize--;
+    // find the highest bit
+    uint8_t b = mi_bsr32((uint32_t)wsize);
+    // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
+    // - adjust with 3 because we use do not round the first 8 sizes
+    //   which each get an exact bin
+    bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
+  }
+  return bin;
+}
+
+static inline uint8_t _mi_bin4(size_t size) {
+  size_t wsize = _mi_wsize_from_size(size);
+  uint8_t bin;
+  if (wsize <= 1) {
+    bin = 1;
+  }
+#if defined(MI_ALIGN4W)
+  else if (wsize <= 4) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+#elif defined(MI_ALIGN2W)
+  else if (wsize <= 8) {
+    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  }
+#else
+  else if (wsize <= 8) {
+    bin = (uint8_t)wsize;
+  }
+#endif
+  else if (wsize > MI_LARGE_WSIZE_MAX) {
+    bin = MI_BIN_HUGE;
+  }
+  else {
+    uint8_t b = mi_bsr32((uint32_t)wsize);
+    bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
+  }
+  return bin;
+}
+
+static size_t _mi_binx4(size_t bsize) {
+  if (bsize==0) return 0;
+  uint8_t b = mi_bsr32((uint32_t)bsize);
+  if (b <= 1) return bsize;
+  size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01);
+  return bin;
+}
+
+static size_t _mi_binx8(size_t bsize) {
+  if (bsize<=1) return bsize;
+  uint8_t b = mi_bsr32((uint32_t)bsize);
+  if (b <= 2) return bsize;
+  size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
+  return bin;
+}
+
+static void mi_bins(void) {
+  //printf("  QNULL(1), /* 0 */ \\\n  ");
+  size_t last_bin = 0;
+  size_t min_bsize = 0;
+  size_t last_bsize = 0;
+  for (size_t bsize = 1; bsize < 2*1024; bsize++) {
+    size_t size = bsize * 64 * 1024;
+    size_t bin = _mi_binx8(bsize);
+    if (bin != last_bin) {
+      printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin);
+      //printf("QNULL(%6zd), ", wsize);
+      //if (last_bin%8 == 0) printf("/* %i */ \\\n  ", last_bin);
+      last_bin = bin;
+      min_bsize = bsize;
+    }
+    last_bsize = bsize;
+  }
+}
+#endif
\ No newline at end of file

From 96008c55d0add668dbb09d135f6ca18a2f6a322e Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Thu, 10 Feb 2022 11:57:30 -0800
Subject: [PATCH 154/352] fix ubsan warning on huge allocations (issue #543)

---
 src/segment.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index c4cf9875..8d3eebe5 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -762,7 +762,8 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   }
 
   // and also for the last one (if not set already) (the last one is needed for coalescing)
-  mi_slice_t* last = &segment->slices[slice_index + slice_count - 1];
+  // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543)
+  mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; 
   if (last < mi_segment_slices_end(segment) && last >= slice) {
     last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1));
     last->slice_count = 0;

From b89b4fd18a103eda7397cc7000b5ee5eda2f3cd8 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 14 Feb 2022 16:44:33 -0800
Subject: [PATCH 155/352] fix v2.0.5 version

---
 cmake/mimalloc-config-version.cmake | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index 76b2af6c..acbd0f70 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,12 +1,6 @@
-<<<<<<< HEAD
 set(mi_version_major 2)
 set(mi_version_minor 0)
-set(mi_version_patch 4)
-=======
-set(mi_version_major 1)
-set(mi_version_minor 7)
 set(mi_version_patch 5)
->>>>>>> dev
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})

From 9f6cbc50eeb20a227fe1def30cb68be8e84b1c32 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Thu, 7 Apr 2022 09:48:08 -0700
Subject: [PATCH 156/352] use heap_stat_decrease when possible

---
 src/page.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/page.c b/src/page.c
index 94fc707d..abc73685 100644
--- a/src/page.c
+++ b/src/page.c
@@ -371,12 +371,12 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   const size_t bsize = mi_page_block_size(page);
   if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) {
     if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {      
-      _mi_stat_decrease(&heap->tld->stats.large, bsize);
+      mi_heap_stat_decrease(heap, large, bsize);
     }
     else {
       // not strictly necessary as we never get here for a huge page
       mi_assert_internal(false);
-      _mi_stat_decrease(&heap->tld->stats.huge, bsize);      
+      mi_heap_stat_decrease(heap, huge, bsize);      
     }
   }
 

From 332346b685808db68b97e0870cbdc82c1ba6e76d Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Thu, 7 Apr 2022 10:38:31 -0700
Subject: [PATCH 157/352] remove unneeded MI_HUGE_OBJ_SIZE_MAX

---
 include/mimalloc-types.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 310fb92b..0456884b 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -156,7 +156,6 @@ typedef int32_t  mi_ssize_t;
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   
 #define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32MiB on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
-#define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
 
 // Maximum number of size classes. (spaced exponentially in 12.5% increments)
 #define MI_BIN_HUGE  (73U)
@@ -175,7 +174,7 @@ typedef int32_t  mi_ssize_t;
 #define MI_MAX_SLICE_OFFSET               ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
 
 // Used as a special value to encode block sizes in 32 bits.
-#define MI_HUGE_BLOCK_SIZE                ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
+#define MI_HUGE_BLOCK_SIZE                ((uint32_t)(2*MI_GiB))
 
 // blocks up to this size are always allocated aligned
 #define MI_MAX_ALIGN_GUARANTEE            (8*MI_MAX_ALIGN_SIZE)  

From 0cda8b02d5bdaa6d23c8862729cce624d5f07964 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Thu, 7 Apr 2022 11:08:54 -0700
Subject: [PATCH 158/352] fix stats for large objects that were off by the
 block size padding

---
 src/alloc.c | 24 +++++++++++++++---------
 src/page.c  | 13 +------------
 2 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 8cf72429..58115daa 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -45,7 +45,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
 
 #if (MI_STAT>0)
   const size_t bsize = mi_page_usable_block_size(page);
-  if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+  if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
     mi_heap_stat_increase(heap, normal, bsize);
     mi_heap_stat_counter_increase(heap, normal_count, 1);
 #if (MI_STAT>1)
@@ -297,20 +297,26 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co
 // only maintain stats for smaller objects if requested
 #if (MI_STAT>0)
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-#if (MI_STAT < 2)  
+  #if (MI_STAT < 2)  
   MI_UNUSED(block);
-#endif
+  #endif
   mi_heap_t* const heap = mi_heap_get_default();
-  const size_t bsize = mi_page_usable_block_size(page);  
-#if (MI_STAT>1)
+  const size_t bsize = mi_page_usable_block_size(page);
+  #if (MI_STAT>1)
   const size_t usize = mi_page_usable_size_of(page, block);
   mi_heap_stat_decrease(heap, malloc, usize);
-#endif  
-  if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+  #endif  
+  if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
     mi_heap_stat_decrease(heap, normal, bsize);
-#if (MI_STAT > 1)
+    #if (MI_STAT > 1)
     mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
-#endif
+    #endif
+  }
+  else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+    mi_heap_stat_decrease(heap, large, bsize);
+  }
+  else {
+    mi_heap_stat_decrease(heap, huge, bsize);
   }
 }
 #else
diff --git a/src/page.c b/src/page.c
index abc73685..1849dc8f 100644
--- a/src/page.c
+++ b/src/page.c
@@ -368,17 +368,6 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   mi_page_set_has_aligned(page, false);
 
   mi_heap_t* heap = mi_page_heap(page);
-  const size_t bsize = mi_page_block_size(page);
-  if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) {
-    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {      
-      mi_heap_stat_decrease(heap, large, bsize);
-    }
-    else {
-      // not strictly necessary as we never get here for a huge page
-      mi_assert_internal(false);
-      mi_heap_stat_decrease(heap, huge, bsize);      
-    }
-  }
 
   // remove from the page list
   // (no need to do _mi_heap_delayed_free first as all blocks are already free)
@@ -791,7 +780,7 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
   mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
   mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size);
   if (page != NULL) {
-    const size_t bsize = mi_page_block_size(page);  // note: not `mi_page_usable_block_size` as `size` includes padding
+    const size_t bsize = mi_page_usable_block_size(page);  // note: includes padding
     mi_assert_internal(mi_page_immediate_available(page));
     mi_assert_internal(bsize >= size);
 

From ea0f5b8779e905736a6e10e4c5e14af3d9590d9d Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 8 Apr 2022 14:52:15 -0700
Subject: [PATCH 159/352] use new MI_ATOMIC_VAR_INIT

---
 src/segment-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 93908c8f..aacdbc11 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_CACHE_FIELDS     (16)
 #define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
 
-#define BITS_SET()          ATOMIC_VAR_INIT(UINTPTR_MAX)
+#define BITS_SET()          MI_ATOMIC_VAR_INIT(UINTPTR_MAX)
 #define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)                          // note: update if MI_CACHE_FIELDS changes
 
 typedef struct mi_cache_slot_s {

From dd929659ab4329ed3d42c423e692ab418cff1856 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Thu, 14 Apr 2022 11:28:40 -0700
Subject: [PATCH 160/352] fix wrong assertion

---
 src/page.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/page.c b/src/page.c
index 1849dc8f..fd6c5397 100644
--- a/src/page.c
+++ b/src/page.c
@@ -780,10 +780,8 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
   mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
   mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size);
   if (page != NULL) {
-    const size_t bsize = mi_page_usable_block_size(page);  // note: includes padding
     mi_assert_internal(mi_page_immediate_available(page));
-    mi_assert_internal(bsize >= size);
-
+    
     if (pq == NULL) {
       // huge pages are directly abandoned
       mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
@@ -794,6 +792,8 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
     else {
       mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
     }
+    
+    const size_t bsize = mi_page_usable_block_size(page);  // note: not `mi_page_block_size` to account for padding
     if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
       mi_heap_stat_increase(heap, large, bsize);
       mi_heap_stat_counter_increase(heap, large_count, 1);

From f9416ce71c3f0cf7a907cf6426c53e5004f1cca1 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Thu, 14 Apr 2022 16:09:12 -0700
Subject: [PATCH 161/352] merge from dev

---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index 80feacf3..ecd28dbb 100644
--- a/src/options.c
+++ b/src/options.c
@@ -91,10 +91,10 @@ static mi_option_desc_t options[_mi_option_last] =
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
   { 16,   UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
+  { 8,    UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.  
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
   { 500,  UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
   { 2,    UNINIT, MI_OPTION(decommit_extend_delay) },
-  { 8,    UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.  
 };
 
 static void mi_option_init(mi_option_desc_t* desc);

From f819dbb4e4813fab464aee16770f39f11476bfea Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Thu, 14 Apr 2022 16:12:02 -0700
Subject: [PATCH 162/352] fix trailing comma

---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index ecd28dbb..b07e0e77 100644
--- a/src/options.c
+++ b/src/options.c
@@ -94,7 +94,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 8,    UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.  
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
   { 500,  UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
-  { 2,    UNINIT, MI_OPTION(decommit_extend_delay) },
+  { 2,    UNINIT, MI_OPTION(decommit_extend_delay) }
 };
 
 static void mi_option_init(mi_option_desc_t* desc);

From a949c9321cce0dc94a59a3f0860ca496d014912e Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Tue, 19 Apr 2022 11:17:53 -0700
Subject: [PATCH 163/352] update vs2022 solution

---
 ide/vs2022/mimalloc-override-test.vcxproj |  8 ++++----
 ide/vs2022/mimalloc-override.vcxproj      | 10 +++++-----
 ide/vs2022/mimalloc-test-api.vcxproj      | 10 +++++-----
 ide/vs2022/mimalloc-test-stress.vcxproj   |  8 ++++----
 ide/vs2022/mimalloc-test.vcxproj          |  8 ++++----
 ide/vs2022/mimalloc.vcxproj               | 10 +++++-----
 6 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/ide/vs2022/mimalloc-override-test.vcxproj b/ide/vs2022/mimalloc-override-test.vcxproj
index a3c56f7b..7a9202f1 100644
--- a/ide/vs2022/mimalloc-override-test.vcxproj
+++ b/ide/vs2022/mimalloc-override-test.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj
index d50c4e6a..4136e574 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@@ -29,22 +29,22 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -236,7 +236,6 @@
     <ClCompile Include="..\..\src\bitmap.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\region.c" />
     <ClCompile Include="..\..\src\options.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\page-queue.c">
@@ -247,6 +246,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2022/mimalloc-test-api.vcxproj b/ide/vs2022/mimalloc-test-api.vcxproj
index 6023c251..812a9cb1 100644
--- a/ide/vs2022/mimalloc-test-api.vcxproj
+++ b/ide/vs2022/mimalloc-test-api.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
@@ -152,4 +152,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj
index c7e820df..ef7ab357 100644
--- a/ide/vs2022/mimalloc-test-stress.vcxproj
+++ b/ide/vs2022/mimalloc-test-stress.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj
index 506dd7d4..13af6ab4 100644
--- a/ide/vs2022/mimalloc-test.vcxproj
+++ b/ide/vs2022/mimalloc-test.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 125d4050..9f967d94 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
@@ -225,7 +225,6 @@
     </ClCompile>
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\region.c" />
     <ClCompile Include="..\..\src\options.c" />
     <ClCompile Include="..\..\src\page-queue.c">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@@ -235,6 +234,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\segment-cache.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />

From a90b98a1441b5cc75e4b0f1f7ec93157cdb1ce2e Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Tue, 19 Apr 2022 19:57:57 -0700
Subject: [PATCH 164/352] update to vs2022

---
 ide/vs2022/mimalloc-override-test.vcxproj |  8 ++++----
 ide/vs2022/mimalloc-override.vcxproj      |  8 ++++----
 ide/vs2022/mimalloc-test-api.vcxproj      | 10 +++++-----
 ide/vs2022/mimalloc-test-stress.vcxproj   |  8 ++++----
 ide/vs2022/mimalloc-test.vcxproj          |  8 ++++----
 ide/vs2022/mimalloc.vcxproj               |  8 ++++----
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/ide/vs2022/mimalloc-override-test.vcxproj b/ide/vs2022/mimalloc-override-test.vcxproj
index 7a9202f1..a3c56f7b 100644
--- a/ide/vs2022/mimalloc-override-test.vcxproj
+++ b/ide/vs2022/mimalloc-override-test.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj
index 4136e574..f10376c7 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@@ -29,22 +29,22 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
diff --git a/ide/vs2022/mimalloc-test-api.vcxproj b/ide/vs2022/mimalloc-test-api.vcxproj
index 812a9cb1..6023c251 100644
--- a/ide/vs2022/mimalloc-test-api.vcxproj
+++ b/ide/vs2022/mimalloc-test-api.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
@@ -152,4 +152,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj
index ef7ab357..c7e820df 100644
--- a/ide/vs2022/mimalloc-test-stress.vcxproj
+++ b/ide/vs2022/mimalloc-test-stress.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj
index 13af6ab4..506dd7d4 100644
--- a/ide/vs2022/mimalloc-test.vcxproj
+++ b/ide/vs2022/mimalloc-test.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index f29e0977..0a45006c 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -29,23 +29,23 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />

From 83d84b8703644a22839f956dbf4380fa4f272d48 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Wed, 20 Apr 2022 09:54:24 -0700
Subject: [PATCH 165/352] increase max alignment limit to 16MiB (issue #576)

---
 include/mimalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index c752ac24..c776efeb 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -166,7 +166,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
 // Note that `alignment` always follows `size` for consistency with unaligned
 // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
 // -------------------------------------------------------------------------------------
-#define MI_ALIGNMENT_MAX   (1024*1024UL)    // maximum supported alignment is 1MiB
+#define MI_ALIGNMENT_MAX   (16*1024*1024UL)  // maximum supported alignment is 16MiB
 
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);

From c128cf69bee927a741b9293ad4298342a7110278 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 30 Oct 2022 19:47:54 -0700
Subject: [PATCH 166/352] fix alignment_max for 32-bit systems (unfortunately,
 we need to include stdint.h now)

---
 include/mimalloc.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index c776efeb..eacf3977 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -95,6 +95,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include <stddef.h>     // size_t
 #include <stdbool.h>    // bool
+#include <stdint.h>     // INTPTR_MAX
 
 #ifdef __cplusplus
 extern "C" {
@@ -166,7 +167,11 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
 // Note that `alignment` always follows `size` for consistency with unaligned
 // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
 // -------------------------------------------------------------------------------------
+#if (INTPTR_MAX > INT32_MAX)
 #define MI_ALIGNMENT_MAX   (16*1024*1024UL)  // maximum supported alignment is 16MiB
+#else
+#define MI_ALIGNMENT_MAX   (1024*1024UL)     // maximum supported alignment for 32-bit systems is 1MiB
+#endif
 
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);

From 9f36808a7f898d20cbabe3c360df9e5732d620cf Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Tue, 1 Nov 2022 16:22:51 -0700
Subject: [PATCH 167/352] initial api for heaps restricted to a certain arena

---
 include/mimalloc-internal.h |  4 ++-
 include/mimalloc-types.h    |  1 +
 include/mimalloc.h          |  2 +-
 src/arena.c                 | 16 +++++-----
 src/bitmap.c                | 19 ++++++++++++
 src/bitmap.h                |  4 +++
 src/heap.c                  | 11 ++++++-
 src/init.c                  |  2 ++
 src/segment-cache.c         | 14 +++++++--
 src/segment.c               | 58 +++++++++++++++++++++----------------
 test/main-override-static.c | 18 +++++++++++-
 11 files changed, 109 insertions(+), 40 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index a4d0a4a9..550b6543 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -93,9 +93,10 @@ void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit,
 void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
 void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld);
 mi_arena_id_t _mi_arena_id_none(void);
+bool       _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id);
 
 // "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
 bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
 void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
@@ -142,6 +143,7 @@ uint8_t    _mi_bin(size_t size);                // for stats
 void       _mi_heap_destroy_pages(mi_heap_t* heap);
 void       _mi_heap_collect_abandon(mi_heap_t* heap);
 void       _mi_heap_set_default_direct(mi_heap_t* heap);
+bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
 
 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index b55d57fa..800d9413 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -438,6 +438,7 @@ struct mi_heap_s {
   mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
   _Atomic(mi_block_t*)  thread_delayed_free;
   mi_threadid_t         thread_id;                           // thread this heap belongs too
+  mi_arena_id_t         arena_id;                            // arena id if the heap belongs to a specific arena (or 0)  
   uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)
   uintptr_t             keys[2];                             // two random keys used to encode the `thread_delayed_free` list
   mi_random_ctx_t       random;                              // random number context used for secure allocation
diff --git a/include/mimalloc.h b/include/mimalloc.h
index db0b06c3..3a0c790e 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -287,7 +287,7 @@ mi_decl_export int  mi_reserve_os_memory_ex(size_t size, bool commit, bool allow
 mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 
 #if MI_MALLOC_VERSION >= 200
-mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id, bool exclusive);
+mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
 #endif
 
 // deprecated
diff --git a/src/arena.c b/src/arena.c
index fbbb0205..8de84001 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -97,8 +97,9 @@ mi_arena_id_t _mi_arena_id_none(void) {
   return 0;
 }
 
-static bool mi_arena_id_suitable(mi_arena_id_t arena_id, bool exclusive, mi_arena_id_t req_arena_id) {
-  return (!exclusive || arena_id == req_arena_id);
+static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) {
+  return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) ||
+          (arena_id == req_arena_id));
 }
 
 
@@ -117,18 +118,16 @@ static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_
 }
 
 static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
-  mi_assert_internal(arena_memid != MI_MEMID_OS);
   *bitmap_index = (arena_memid >> 8);
   mi_arena_id_t id = (int)(arena_memid & 0x7F);
   *arena_index = mi_arena_id_index(id);
   return ((arena_memid & 0x80) != 0);
 }
 
-bool _mi_arena_memid_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) {
-  mi_assert_internal(arena_memid != MI_MEMID_OS);
+bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) {
   mi_arena_id_t id = (int)(arena_memid & 0x7F);
   bool exclusive = ((arena_memid & 0x80) != 0);
-  return mi_arena_id_suitable(id, exclusive, request_arena_id);
+  return mi_arena_id_is_suitable(id, exclusive, request_arena_id);
 }
 
 static size_t mi_block_count_of_size(size_t size) {
@@ -159,7 +158,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
 {
   MI_UNUSED(arena_index);
   mi_assert_internal(mi_arena_id_index(arena->id) == arena_index);
-  if (!mi_arena_id_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL;
+  if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL;
 
   mi_bitmap_index_t bitmap_index;
   if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
@@ -266,7 +265,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
   }
 
   // finally, fall back to the OS
-  if (mi_option_is_enabled(mi_option_limit_os_alloc)) {
+  if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) {
     errno = ENOMEM;
     return NULL;
   }
@@ -282,6 +281,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, b
   return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
 }
 
+
 /* -----------------------------------------------------------
   Arena free
 ----------------------------------------------------------- */
diff --git a/src/bitmap.c b/src/bitmap.c
index 4e85d687..4fc7a1f3 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -108,6 +108,25 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel
   return false;
 }
 
+// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled
+bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, 
+            const size_t start_field_idx, const size_t count, 
+            mi_bitmap_pred_fun_t pred_fun, void* pred_arg,            
+            mi_bitmap_index_t* bitmap_idx) {
+  size_t idx = start_field_idx;
+  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
+    if (idx >= bitmap_fields) idx = 0; // wrap
+    if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
+      if (pred_fun == NULL || pred_fun(*bitmap_idx, pred_arg)) { 
+        return true;
+      }
+      // predicate returned false, unclaim and look further
+      _mi_bitmap_unclaim(bitmap, bitmap_fields, count, *bitmap_idx);
+    }
+  }
+  return false;
+}
+
 /*
 // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
 // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
diff --git a/src/bitmap.h b/src/bitmap.h
index 7bd3106c..0c501ec1 100644
--- a/src/bitmap.h
+++ b/src/bitmap.h
@@ -72,6 +72,10 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_
 // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
 bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
 
+// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled
+typedef bool (mi_cdecl *mi_bitmap_pred_fun_t)(mi_bitmap_index_t bitmap_idx, void* pred_arg);
+bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx);
+
 // Set `count` bits at `bitmap_idx` to 0 atomically
 // Returns `true` if all `count` bits were 1 previously.
 bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
diff --git a/src/heap.c b/src/heap.c
index bc103a01..15ca3603 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -200,13 +200,14 @@ mi_heap_t* mi_heap_get_backing(void) {
   return bheap;
 }
 
-mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
+mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) {
   mi_heap_t* bheap = mi_heap_get_backing();
   mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);  // todo: OS allocate in secure mode?
   if (heap==NULL) return NULL;
   _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
   heap->tld = bheap->tld;
   heap->thread_id = _mi_thread_id();
+  heap->arena_id = arena_id;
   _mi_random_split(&bheap->random, &heap->random);
   heap->cookie  = _mi_heap_random_next(heap) | 1;
   heap->keys[0] = _mi_heap_random_next(heap);
@@ -218,6 +219,14 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
   return heap;
 }
 
+mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
+  return mi_heap_new_in_arena(_mi_arena_id_none());
+}
+
+bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) {
+  return _mi_arena_memid_is_suitable(memid, heap->arena_id);
+}
+
 uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
   return _mi_random_next(&heap->random);
 }
diff --git a/src/init.c b/src/init.c
index 66e71ef2..4f37b717 100644
--- a/src/init.c
+++ b/src/init.c
@@ -109,6 +109,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   MI_ATOMIC_VAR_INIT(NULL),
   0,                // tid
   0,                // cookie
+  0,                // arena id
   { 0, 0 },         // keys
   { {0}, {0}, 0 },
   0,                // page count
@@ -149,6 +150,7 @@ mi_heap_t _mi_heap_main = {
   MI_ATOMIC_VAR_INIT(NULL),
   0,                // thread id
   0,                // initial cookie
+  0,                // arena id
   { 0, 0 },         // the key of the main heap can be fixed (unlike page keys that need to be secure!)
   { {0x846ca68b}, {0}, 0 },  // random
   0,                // page count
diff --git a/src/segment-cache.c b/src/segment-cache.c
index eac8f843..da726716 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -39,8 +39,13 @@ static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] =
 static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
 static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
+static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) {
+  mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg);
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  return _mi_arena_memid_is_suitable(slot->memid, req_arena_id);
+}
 
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
   return NULL;
@@ -60,12 +65,15 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
   // find an available slot
   mi_bitmap_index_t bitidx = 0;
   bool claimed = false;
+  mi_arena_id_t req_arena_id = _req_arena_id;
+  mi_bitmap_pred_fun_t pred_fun = &mi_segment_cache_is_suitable;  // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
+  
   if (*large) {  // large allowed?
-    claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
     if (claimed) *large = true;
   }
   if (!claimed) {
-    claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
     if (claimed) *large = false;
   }
 
diff --git a/src/segment.c b/src/segment.c
index 46bba9d7..2ae591fd 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -721,7 +721,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   return page;
 }
 
-static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segments_tld_t* tld) {
+static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX);
   // search from best fit up
   mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld);
@@ -730,19 +730,23 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
     for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) {
       if (slice->slice_count >= slice_count) {
         // found one
-        mi_span_queue_delete(sq, slice);
         mi_segment_t* segment = _mi_ptr_segment(slice);
-        if (slice->slice_count > slice_count) {
-          mi_segment_slice_split(segment, slice, slice_count, tld);
+        if (_mi_arena_memid_is_suitable(segment->memid, req_arena_id)) {
+          // found a suitable page span
+          mi_span_queue_delete(sq, slice);
+
+          if (slice->slice_count > slice_count) {
+            mi_segment_slice_split(segment, slice, slice_count, tld);
+          }
+          mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
+          mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
+          if (page == NULL) {
+            // commit failed; return NULL but first restore the slice
+            mi_segment_span_free_coalesce(slice, tld);
+            return NULL;
+          }
+          return page;
         }
-        mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
-        mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
-        if (page == NULL) {
-          // commit failed; return NULL but first restore the slice
-          mi_segment_span_free_coalesce(slice, tld);
-          return NULL;
-        }
-        return page;        
       }
     }
     sq++;
@@ -757,7 +761,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
 ----------------------------------------------------------- */
 
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
+static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
 {
   mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL));
   mi_assert_internal((segment==NULL) || (segment!=NULL && required==0));
@@ -793,9 +797,9 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
     bool is_pinned = false;
     size_t memid = 0;
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
+    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
     if (segment==NULL) {
-      segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, os_tld);
+      segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
       if (segment == NULL) return NULL;  // failed to allocate
       if (commit) {
         mi_commit_mask_create_full(&commit_mask);
@@ -908,8 +912,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
 
 
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) {
-  return mi_segment_init(NULL, required, tld, os_tld, huge_page);
+static mi_segment_t* mi_segment_alloc(size_t required, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) {
+  return mi_segment_init(NULL, required, req_arena_id, tld, os_tld, huge_page);
 }
 
 
@@ -1368,6 +1372,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
   long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024);     // limit the work to bound allocation times  
   while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
     segment->abandoned_visits++;
+    // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
+    // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way?
+    bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid);
     bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees)
     if (segment->used == 0) {
       // free the segment (by forced reclaim) to make it available to other threads.
@@ -1377,13 +1384,13 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
       // freeing but that would violate some invariants temporarily)
       mi_segment_reclaim(segment, heap, 0, NULL, tld);
     }
-    else if (has_page) {
+    else if (has_page && is_suitable) {
       // found a large enough free span, or a page of the right block_size with free space 
       // we return the result of reclaim (which is usually `segment`) as it might free
       // the segment due to concurrent frees (in which case `NULL` is returned).
       return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
     }
-    else if (segment->abandoned_visits > 3) {  
+    else if (segment->abandoned_visits > 3 && is_suitable) {  
       // always reclaim on 3rd visit to limit the abandoned queue length.
       mi_segment_reclaim(segment, heap, 0, NULL, tld);
     }
@@ -1425,7 +1432,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
    Reclaim or allocate
 ----------------------------------------------------------- */
 
-static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) 
+static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
   mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
   mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
@@ -1443,7 +1450,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
     return segment;
   }
   // 2. otherwise allocate a fresh segment
-  return mi_segment_alloc(0, tld, os_tld, NULL);  
+  return mi_segment_alloc(0, heap->arena_id, tld, os_tld, NULL);  
 }
 
 
@@ -1459,7 +1466,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
   size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
   size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
   mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
-  mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
+  mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, heap->arena_id, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
   if (page==NULL) {
     // no free page, allocate a new segment and try again
     if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) {
@@ -1483,10 +1490,10 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
    Huge page allocation
 ----------------------------------------------------------- */
 
-static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
   mi_page_t* page = NULL;
-  mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page);
+  mi_segment_t* segment = mi_segment_alloc(size,req_arena_id,tld,os_tld,&page);
   if (segment == NULL || page==NULL) return NULL;
   mi_assert_internal(segment->used==1);
   mi_assert_internal(mi_page_block_size(page) >= size);  
@@ -1536,8 +1543,9 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment
     page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
   }
   else {
-    page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
+    page = mi_segment_huge_page_alloc(block_size,heap->arena_id,tld,os_tld);    
   }
+  mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid));
   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
   return page;
 }
diff --git a/test/main-override-static.c b/test/main-override-static.c
index adc07aee..70b6293c 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -19,6 +19,7 @@ static void test_reserved(void);
 static void negative_stat(void);
 static void alloc_huge(void);
 static void test_heap_walk(void);
+static void test_heap_arena(void);
 
 int main() {
   mi_version();
@@ -33,7 +34,8 @@ int main() {
   // test_reserved();
   // negative_stat();
   // alloc_huge();
-  test_heap_walk();
+  // test_heap_walk();
+  test_heap_arena();
   
   void* p1 = malloc(78);
   void* p2 = malloc(24);
@@ -212,6 +214,20 @@ static void test_heap_walk(void) {
   mi_heap_visit_blocks(heap, true, &test_visit, NULL);
 }
 
+static void test_heap_arena(void) {
+  mi_arena_id_t arena_id;
+  int err = mi_reserve_os_memory_ex(100 * 1024 * 1024, false /* commit */, false /* allow large */, true /* exclusive */, &arena_id);
+  if (err) abort();
+  mi_heap_t* heap = mi_heap_new_in_arena(arena_id);
+  for (int i = 0; i < 500000; i++) {
+    void* p = mi_heap_malloc(heap, 1024);
+    if (p == NULL) {
+      printf("out of memory after %d kb (expecting about 100_000kb)\n", i);
+      break;
+    }
+  }
+}
+
 // ----------------------------
 // bin size experiments
 // ------------------------------

From f859190cba4cb0812e443b9dc182a4af7aa205a3 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Thu, 3 Nov 2022 17:05:38 -0700
Subject: [PATCH 168/352] update to v2.0.7

---
 cmake/mimalloc-config-version.cmake | 2 +-
 include/mimalloc.h                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index 8063afe6..f0669c84 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
 set(mi_version_major 2)
 set(mi_version_minor 0)
-set(mi_version_patch 6)
+set(mi_version_patch 7)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 7ca819c2..32eab19e 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 206   // major + 2 digits minor
+#define MI_MALLOC_VERSION 207   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes

From 0e3d543a1391a010cca18d2935557d9ce1495a51 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Thu, 3 Nov 2022 17:11:21 -0700
Subject: [PATCH 169/352] Update readme.md

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index fe2ead69..102efc15 100644
--- a/readme.md
+++ b/readme.md
@@ -77,7 +77,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
   and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
   (see [below](#performance)); please report if you observe any significant performance regression.
 
-* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind] for leak testing and heap block overflow detection. Initial
+* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow detection. Initial
   support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
 
 * 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation

From 18a4b90501cc93ad853b012a1c129d468e8358aa Mon Sep 17 00:00:00 2001
From: Ofek Lev <ofekmeister@gmail.com>
Date: Sat, 5 Nov 2022 16:29:18 -0400
Subject: [PATCH 170/352] Fix typo

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 102efc15..58863099 100644
--- a/readme.md
+++ b/readme.md
@@ -73,7 +73,7 @@ Enjoy!
 
 ### Releases
 
-Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
+Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to reduce memory usage
   and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
   (see [below](#performance)); please report if you observe any significant performance regression.
 

From 562efed54d36e436518fbb0d49d67e74f8a33207 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 6 Nov 2022 20:36:51 -0800
Subject: [PATCH 171/352] fix full SEGMENT_SIZE internal alignment by adding
 one more slice entry

---
 include/mimalloc-internal.h |  6 +++---
 include/mimalloc-types.h    |  2 +-
 src/alloc-aligned.c         |  2 +-
 src/segment.c               | 14 +++++++++-----
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 40bb1349..243a45a9 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -481,11 +481,11 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
 
 // Get the page containing the pointer
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
+  mi_assert_internal(p > segment);
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
-  mi_assert_internal(diff >= 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE /* can be equal for large alignment */);
-  if (diff == MI_SEGMENT_SIZE) diff--;
+  mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE);
   size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
-  mi_assert_internal(idx < segment->slice_entries);
+  mi_assert_internal(idx <= segment->slice_entries);
   mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
   mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data
   mi_assert_internal(slice->slice_offset == 0);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index b5931789..b960a460 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -381,7 +381,7 @@ typedef struct mi_segment_s {
   mi_segment_kind_t kind;
   _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
   size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
-  mi_slice_t        slices[MI_SLICES_PER_SEGMENT];
+  mi_slice_t        slices[MI_SLICES_PER_SEGMENT+1];  // one more for huge blocks with large alignment
 } mi_segment_t;
 
 
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 76ed0ed7..06ed5272 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -63,7 +63,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
   if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true);  }
     
-  mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
+  // mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
   
   #if MI_TRACK_ENABLED
diff --git a/src/segment.c b/src/segment.c
index c743f02f..f637e7a9 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -277,7 +277,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
       }
       // and the last entry as well (for coalescing)
       const mi_slice_t* last = slice + slice->slice_count - 1;
-      if (last > slice && last < mi_segment_slices_end(segment)) {
+      if (last > slice && last <= mi_segment_slices_end(segment)) {
         mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t));
         mi_assert_internal(last->slice_count == 0);
         mi_assert_internal(last->xblock_size == 1);
@@ -709,9 +709,13 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
 
   // and also for the last one (if not set already) (the last one is needed for coalescing)
   // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543)
-  mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; 
-  if (last < mi_segment_slices_end(segment) && last >= slice) {
-    last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1));
+  size_t slice_last_index = slice_index + slice_count - 1;
+  if (slice_last_index >= segment->slice_entries) { 
+    slice_last_index = segment->slice_entries; 
+  }
+  mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_last_index]; 
+  if (last <= mi_segment_slices_end(segment) && last >= slice) {
+    last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_last_index - slice_index));
     last->slice_count = 0;
     last->xblock_size = 1;
   }
@@ -853,7 +857,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, siz
   if (!is_zero) {
     ptrdiff_t ofs = offsetof(mi_segment_t, next);
     size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
-    memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices);
+    memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1));  // one more
   }
 
   if (!commit_info_still_good) {

From 651a99b35d3a70b764524813af4d205333866653 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 6 Nov 2022 20:57:27 -0800
Subject: [PATCH 172/352] refine last slice setting for large alignments

---
 include/mimalloc-internal.h |  2 +-
 include/mimalloc-types.h    |  3 +--
 src/segment.c               | 29 ++++++++++++++---------------
 test/test-api.c             |  7 ++++---
 4 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 243a45a9..bb4f50d3 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -481,7 +481,7 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
 
 // Get the page containing the pointer
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
-  mi_assert_internal(p > segment);
+  mi_assert_internal(p > (void*)segment);
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
   mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE);
   size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index b960a460..0cef11da 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -174,8 +174,7 @@ typedef int32_t  mi_ssize_t;
 #endif
 
 // Maximum slice offset (15)
-// #define MI_MAX_SLICE_OFFSET               ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
-#define MI_MAX_SLICE_OFFSET               ((MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE))
+#define MI_MAX_SLICE_OFFSET               ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
 
 // Used as a special value to encode block sizes in 32 bits.
 #define MI_HUGE_BLOCK_SIZE                ((uint32_t)(2*MI_GiB))
diff --git a/src/segment.c b/src/segment.c
index f637e7a9..0a5ac3c7 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -277,7 +277,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
       }
       // and the last entry as well (for coalescing)
       const mi_slice_t* last = slice + slice->slice_count - 1;
-      if (last > slice && last <= mi_segment_slices_end(segment)) {
+      if (last > slice && last < mi_segment_slices_end(segment)) {
         mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t));
         mi_assert_internal(last->slice_count == 0);
         mi_assert_internal(last->xblock_size == 1);
@@ -679,7 +679,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz
 // Note: may still return NULL if committing the memory failed
 static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_entries);
-  mi_slice_t* slice = &segment->slices[slice_index];
+  mi_slice_t* const slice = &segment->slices[slice_index];
   mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1);
 
   // commit before changing the slice data
@@ -700,22 +700,21 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   size_t extra = slice_count-1;
   if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET;
   if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1;  // huge objects may have more slices than avaiable entries in the segment->slices
-  slice++;
-  for (size_t i = 1; i <= extra; i++, slice++) {
-    slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
-    slice->slice_count = 0;
-    slice->xblock_size = 1;
+  
+  mi_slice_t* slice_next = slice + 1;
+  for (size_t i = 1; i <= extra; i++, slice_next++) {
+    slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
+    slice_next->slice_count = 0;
+    slice_next->xblock_size = 1;
   }
 
-  // and also for the last one (if not set already) (the last one is needed for coalescing)
+  // and also for the last one (if not set already) (the last one is needed for coalescing and for large alignments)
   // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543)
-  size_t slice_last_index = slice_index + slice_count - 1;
-  if (slice_last_index >= segment->slice_entries) { 
-    slice_last_index = segment->slice_entries; 
-  }
-  mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_last_index]; 
-  if (last <= mi_segment_slices_end(segment) && last >= slice) {
-    last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_last_index - slice_index));
+  mi_slice_t* last = slice + slice_count - 1;
+  mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment);
+  if (last > end) last = end;
+  if (last > slice) {
+    last->slice_offset = (uint32_t)(sizeof(mi_slice_t) * (last - slice));
     last->slice_count = 0;
     last->xblock_size = 1;
   }
diff --git a/test/test-api.c b/test/test-api.c
index 312b3f1b..01ef98bd 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -177,10 +177,11 @@ int main(void) {
   };
   CHECK_BODY("malloc-aligned9") {
     bool ok = true;
-    for (int i = 0; i < 5 && ok; i++) {
+    for (int i = 0; i < 8 && ok; i++) {
       int n = (1 << i);
-      void* p = mi_malloc_aligned( 2*n*MI_ALIGNMENT_MAX, n*MI_ALIGNMENT_MAX);
-      ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0;
+      size_t align = n * (MI_ALIGNMENT_MAX / 8);
+      void* p = mi_malloc_aligned( 2*align, align);
+      ok = ((uintptr_t)p % align) == 0;
       mi_free(p);
     }
     result = ok;

From 1632dd73c9254322f3d65f696195b9b7005ac445 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Sun, 6 Nov 2022 21:03:23 -0800
Subject: [PATCH 173/352] remove superfluous asserts

---
 include/mimalloc-internal.h |  2 +-
 src/alloc-aligned.c         | 10 +++-------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index bb4f50d3..192e14da 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -479,7 +479,7 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
   return start;
 }
 
-// Get the page containing the pointer
+// Get the page containing the pointer (performance critical as it is called in mi_free)
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
   mi_assert_internal(p > (void*)segment);
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 06ed5272..66a26b49 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -43,12 +43,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
     }
     oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
     p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block
-    if (p == NULL) return NULL;
-    const uintptr_t adjustx = alignment - (((uintptr_t)p + offset) & align_mask);
-    const mi_page_t* page = _mi_ptr_page(p);
-    const size_t bsize = mi_page_usable_block_size(page);
-    mi_assert_internal(bsize >= adjustx + size);
-    mi_assert_internal(true);
+    if (p == NULL) return NULL;    
   }
   else {
     // otherwise over-allocate
@@ -63,8 +58,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
   if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true);  }
     
-  // mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
+  mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
+  mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
   
   #if MI_TRACK_ENABLED
   if (p != aligned_p) {

From 96f1574faf9739ee6eca5a55df9370767a094247 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 10:51:15 -0800
Subject: [PATCH 174/352] fix huge page aligned allocation size in secure mode

---
 ide/vs2022/mimalloc.vcxproj |  2 +-
 src/segment.c               | 24 ++++++++++++++++--------
 test/test-api.c             |  2 +-
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 616c226c..9811aa55 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -116,7 +116,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=4;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp20</LanguageStandard>
diff --git a/src/segment.c b/src/segment.c
index 0a5ac3c7..22b9ccd0 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -336,12 +336,14 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz
   size_t page_size = _mi_os_page_size();
   size_t isize     = _mi_align_up(sizeof(mi_segment_t), page_size);
   size_t guardsize = 0;
-
+  
   if (MI_SECURE>0) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data (and one at the end of the segment)
-    guardsize =  page_size;
-    required  = _mi_align_up(required, page_size);
+    guardsize = page_size;
+    if (required > 0) {
+      required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size;
+    }
   }
 
   if (pre_size != NULL) *pre_size = isize;
@@ -802,21 +804,27 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, siz
     size_t memid = 0;
     size_t align_offset = 0;
     size_t alignment = MI_SEGMENT_SIZE;
-    size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
-
+    
     if (page_alignment > 0) {
       mi_assert_internal(huge_page != NULL);
       mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN);
       alignment = page_alignment;
       const size_t info_size = info_slices * MI_SEGMENT_SLICE_SIZE;
       align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN );
-      segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE);
-      segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE;
+      const size_t extra = align_offset - info_size;
+      // recalculate due to potential guard pages
+      segment_slices = mi_segment_calculate_slices(required + extra, &pre_size, &info_slices);
+      //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE);
+      //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE;
     }
-    else {
+    const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
+
+    // get from cache
+    if (page_alignment == 0) {
       segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
     }
     
+    // get from OS
     if (segment==NULL) {
       segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
       if (segment == NULL) return NULL;  // failed to allocate
diff --git a/test/test-api.c b/test/test-api.c
index 01ef98bd..e7f3a4ed 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -149,7 +149,7 @@ int main(void) {
     for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) {
       void* ps[8];
       for (int i = 0; i < 8 && ok; i++) {
-        ps[i] = mi_malloc_aligned(align*13 /*size*/, align);
+        ps[i] = mi_malloc_aligned(align*5 /*size*/, align);
         if (ps[i] == NULL || (uintptr_t)(ps[i]) % align != 0) {
           ok = false;
         }

From 29405c7d70c931c890f53f3ad80243ba83220768 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 14:53:27 -0800
Subject: [PATCH 175/352] fix initializer

---
 src/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/init.c b/src/init.c
index 38a38913..ca48fb10 100644
--- a/src/init.c
+++ b/src/init.c
@@ -111,7 +111,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   0,                // cookie
   0,                // arena id
   { 0, 0 },         // keys
-  { {0}, {0}, 0 },
+  { {0}, {0}, 0, true },
   0,                // page count
   MI_BIN_FULL, 0,   // page retired min/max
   NULL,             // next
@@ -152,7 +152,7 @@ mi_heap_t _mi_heap_main = {
   0,                // initial cookie
   0,                // arena id
   { 0, 0 },         // the key of the main heap can be fixed (unlike page keys that need to be secure!)
-  { {0x846ca68b}, {0}, 0 },  // random
+  { {0x846ca68b}, {0}, 0, true },  // random
   0,                // page count
   MI_BIN_FULL, 0,   // page retired min/max
   NULL,             // next heap

From 67439bb4e5b00f1144bf7516c75649fd29d5dd3e Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 17:12:14 -0800
Subject: [PATCH 176/352] add NULL check in _mi_segment_of

---
 src/segment-cache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 34c8b029..436ce2bf 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -285,8 +285,9 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) {
 
 // Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
 static mi_segment_t* _mi_segment_of(const void* p) {
+  if (p == NULL) return NULL;
   mi_segment_t* segment = _mi_ptr_segment(p);
-  if (segment == NULL) return NULL; 
+  mi_assert_internal(segment != NULL);
   size_t bitidx;
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge

From ba8c0f890314d80d830599f686eaa63aafcee880 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Mon, 7 Nov 2022 17:21:03 -0800
Subject: [PATCH 177/352] avoid warning for large aligned blocks on linux

---
 src/alloc.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 2b53ac22..f951370a 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -488,10 +488,16 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
 
 #if (MI_DEBUG>0)
   if mi_unlikely(!mi_is_in_heap_region(p)) {
-    _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
-      "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
-    if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
-      _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
+  #if (MI_INTPTR_SIZE == 8 && defined(__linux__))
+    if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640)
+  #else
+    {
+  #endif
+      _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
+        "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
+      if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
+        _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
+      }
     }
   }
 #endif

From b940543cd582514b5f53bc6c317d5fcfd7f28f55 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Thu, 17 Nov 2022 18:57:45 -0800
Subject: [PATCH 178/352] experiment with smaller segment size (32MiB) and
 finer minimal commit (1MiB)

---
 include/mimalloc-types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 016bb684..3a4f8f6b 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -132,7 +132,7 @@ typedef int32_t  mi_ssize_t;
 
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit (usually divide by two for 32-bit)
-#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
+#define MI_SEGMENT_SLICE_SHIFT            (12 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
 
 #if MI_INTPTR_SIZE > 4
 #define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64MiB
@@ -324,7 +324,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (2*MI_MiB)
+#define MI_MINIMAL_COMMIT_SIZE      (MI_MiB)
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS

From 82a765a255b028cf57b9ddcf95a125cdf821da87 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 18 Nov 2022 09:38:01 -0800
Subject: [PATCH 179/352] experiment with 32KiB slices and increased
 MI_MIN_EXTEND

---
 include/mimalloc-types.h | 8 ++++----
 src/page.c               | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 3a4f8f6b..88905bdc 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -132,16 +132,16 @@ typedef int32_t  mi_ssize_t;
 
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit (usually divide by two for 32-bit)
-#define MI_SEGMENT_SLICE_SHIFT            (12 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
+#define MI_SEGMENT_SLICE_SHIFT            (12 + MI_INTPTR_SHIFT)         // 32KiB  (32KiB on 32-bit)
 
 #if MI_INTPTR_SIZE > 4
-#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64MiB
+#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 32MiB
 #else
 #define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  // 4MiB on 32-bit
 #endif
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
-#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB
+#define MI_MEDIUM_PAGE_SHIFT              ( 4 + MI_SMALL_PAGE_SHIFT)     // 512KiB
 
 
 // Derived constants
@@ -324,7 +324,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (MI_MiB)
+#define MI_MINIMAL_COMMIT_SIZE      (2*MI_MiB)
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
diff --git a/src/page.c b/src/page.c
index 2fa03606..cb957bf7 100644
--- a/src/page.c
+++ b/src/page.c
@@ -408,7 +408,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
 }
 
 // Retire parameters
-#define MI_MAX_RETIRE_SIZE    MI_MEDIUM_OBJ_SIZE_MAX  
+#define MI_MAX_RETIRE_SIZE    (MI_MEDIUM_OBJ_SIZE_MAX)
 #define MI_RETIRE_CYCLES      (8)
 
 // Retire a page with no more used blocks
@@ -579,7 +579,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
 #if (MI_SECURE>0)
 #define MI_MIN_EXTEND         (8*MI_SECURE) // extend at least by this many
 #else
-#define MI_MIN_EXTEND         (1)
+#define MI_MIN_EXTEND         (4)
 #endif
 
 // Extend the capacity (up to reserved) by initializing a free list

From 1a7f6f376d28571432ee4d3a498680da3c9dda89 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 10:22:50 -0800
Subject: [PATCH 180/352] move threadid field

---
 include/mimalloc-types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index cf142748..d44ecc18 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -374,8 +374,9 @@ typedef struct mi_segment_s {
 
   // layout like this to optimize access in `mi_free`
   mi_segment_kind_t kind;
-  _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
   size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
+  _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
+
   mi_slice_t        slices[MI_SLICES_PER_SEGMENT+1];  // one more for huge blocks with large alignment
 } mi_segment_t;
 

From 3ccf849c1a901f3c6fd11ad16c089d60ace4580a Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 15:02:41 -0800
Subject: [PATCH 181/352] more refined decommit extend delay

---
 src/segment.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 3423bd53..25e63904 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -540,8 +540,12 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     }
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
-      // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+      if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) {
+        mi_segment_delayed_decommit(segment, true, stats);
+      }
+      else {
+        segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+      }
     }
     else {
       // previous decommit mask is not yet expired, increase the expiration by a bit.

From c0077471695338fb5a233a42341a052133ef0180 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 15:03:15 -0800
Subject: [PATCH 182/352] back to 64k pages but 32MiB segments and a 1MiB
 minimal commit size

---
 include/mimalloc-types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index d44ecc18..399001c6 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -132,10 +132,10 @@ typedef int32_t  mi_ssize_t;
 
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit (usually divide by two for 32-bit)
-#define MI_SEGMENT_SLICE_SHIFT            (12 + MI_INTPTR_SHIFT)         // 32KiB  (32KiB on 32-bit)
+#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
 
 #if MI_INTPTR_SIZE > 4
-#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 32MiB
+#define MI_SEGMENT_SHIFT                  ( 9 + MI_SEGMENT_SLICE_SHIFT)  // 32MiB
 #else
 #define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  // 4MiB on 32-bit
 #endif
@@ -325,7 +325,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (2*MI_MiB)
+#define MI_MINIMAL_COMMIT_SIZE      (16*MI_SEGMENT_SLICE_SIZE)           // 1MiB
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS

From 83c027c4bf591b3154a6bab02dc343ff99837387 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 21 Nov 2022 18:56:56 -0800
Subject: [PATCH 183/352] fix medium page size to 512k

---
 include/mimalloc-types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 399001c6..9def491e 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -141,7 +141,7 @@ typedef int32_t  mi_ssize_t;
 #endif
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
-#define MI_MEDIUM_PAGE_SHIFT              ( 4 + MI_SMALL_PAGE_SHIFT)     // 512KiB
+#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB
 
 
 // Derived constants

From 20880807ce275b5ed23eb124d8a4b157eb042dd0 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 22 Nov 2022 22:05:18 -0800
Subject: [PATCH 184/352] remove comment

---
 src/options.c |  2 +-
 src/segment.c | 11 +----------
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/options.c b/src/options.c
index 6c6f8f2f..6980a047 100644
--- a/src/options.c
+++ b/src/options.c
@@ -94,7 +94,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 8,    UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.  
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
   { 500,  UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
-  { 2,    UNINIT, MI_OPTION(decommit_extend_delay) },
+  { 1,    UNINIT, MI_OPTION(decommit_extend_delay) },
   { 0,    UNINIT, MI_OPTION(destroy_on_exit)}     // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
 };
 
diff --git a/src/segment.c b/src/segment.c
index 803bb47b..55ec4615 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -316,7 +316,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
   ptrdiff_t idx = slice - segment->slices;
   size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
   // make the start not OS page aligned for smaller blocks to avoid page/cache effects
-  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); 
+  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); 
   if (page_size != NULL) { *page_size = psize - start_offset; }
   return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
 }
@@ -463,15 +463,6 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
 static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
   mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
 
-  // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes.
-  /*
-  if (commit && size > 0) {
-    const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE);
-    if (p + csize <= mi_segment_end(segment)) {
-      size = csize;
-    }
-  }
-  */
   // commit liberal, but decommit conservative
   uint8_t* start = NULL;
   size_t   full_size = 0;

From 9e56567d23126d0b608487fa4f11d5b59845e50f Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 23 Nov 2022 09:50:29 -0800
Subject: [PATCH 185/352] fix decommit for huge objects

---
 src/alloc.c            |  9 ++++++---
 src/segment.c          | 37 ++++++++++++++++---------------------
 test/main-override.cpp | 18 ++++++++++++++++--
 test/test-stress.c     |  2 +-
 4 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index f602fdcf..ac117f17 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -395,9 +395,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
     #endif
   }
   
-
   #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED                    // note: when tracking, cannot use mi_usable_size with multi-threading
-  memset(block, MI_DEBUG_FREED, mi_usable_size(block));
+  if (segment->kind != MI_SEGMENT_HUGE) {                   // not for huge segments as we just reset the content
+    memset(block, MI_DEBUG_FREED, mi_usable_size(block));
+  }
   #endif
 
   // Try to put the block on either the page-local thread free list, or the heap delayed free list.
@@ -449,7 +450,9 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
     if mi_unlikely(mi_check_is_double_free(page, block)) return;
     mi_check_padding(page, block);
     #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
-    memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+    if (!mi_page_is_huge(page)) {   // huge page content may be already decommitted
+      memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+    }
     #endif
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
diff --git a/src/segment.c b/src/segment.c
index 55ec4615..b054b975 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1522,25 +1522,23 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
   #if MI_HUGE_PAGE_ABANDON
   segment->thread_id = 0; // huge segments are immediately abandoned
   #endif  
-  
-  if (page_alignment > 0) {
-    size_t psize;
-    uint8_t* p = _mi_segment_page_start(segment, page, &psize);
-    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)p, page_alignment);
-    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
-    mi_assert_internal(psize - (aligned_p - p) >= size);
-    if (!segment->allow_decommit) {
-      // decommit the part of the page that is unused; this can be quite large (close to MI_SEGMENT_SIZE)
-      uint8_t* decommit_start = p + sizeof(mi_block_t); // for the free list
-      ptrdiff_t decommit_size = aligned_p - decommit_start;
-      mi_segment_decommit(segment, decommit_start, decommit_size, &_mi_stats_main);      
-    }
-  }
+
   // for huge pages we initialize the xblock_size as we may
   // overallocate to accommodate large alignments.
   size_t psize;
-  _mi_segment_page_start(segment, page, &psize);
+  uint8_t* start = _mi_segment_page_start(segment, page, &psize);
   page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize);
+  
+  // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE)
+  if (page_alignment > 0 && segment->allow_decommit) {
+    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment);
+    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
+    mi_assert_internal(psize - (aligned_p - start) >= size);      
+    uint8_t* decommit_start = start + sizeof(mi_block_t);              // for the free list
+    ptrdiff_t decommit_size = aligned_p - decommit_start;
+    _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments    
+  }
+  
   return page;
 }
 
@@ -1579,13 +1577,10 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc
   mi_assert_internal(segment == _mi_page_segment(page));
   mi_assert_internal(page->used == 1); // this is called just before the free
   mi_assert_internal(page->free == NULL);
-  const size_t csize = mi_page_block_size(page) - sizeof(mi_block_t);
-  uint8_t* p = ( uint8_t*)block + sizeof(mi_block_t);
   if (segment->allow_decommit) {
-    mi_segment_decommit(segment, p, csize, &_mi_stats_main);
-  }
-  else {
-    _mi_os_reset(p, csize, &_mi_stats_main);
+    const size_t csize = mi_usable_size(block) - sizeof(mi_block_t);
+    uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
+    _mi_os_decommit(p, csize, &_mi_stats_main);  // note: cannot use segment_decommit on huge segments
   }
 }
 #endif
diff --git a/test/main-override.cpp b/test/main-override.cpp
index b205dc85..e12567d9 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -37,12 +37,14 @@ static void fail_aslr();              // issue #372
 static void tsan_numa_test();         // issue #414
 static void strdup_test();            // issue #445 
 static void bench_alloc_large(void);  // issue #xxx
+static void heap_thread_free_huge();
 
 static void test_stl_allocators();
 
 int main() {
   mi_stats_reset();  // ignore earlier allocations
-
+  heap_thread_free_huge();
+  /*
    heap_thread_free_large();
    heap_no_delete();
    heap_late_free();
@@ -51,7 +53,7 @@ int main() {
    large_alloc();
    tsan_numa_test();
    strdup_test();
-
+  */
   test_stl_allocators();
 
   test_mt_shutdown();
@@ -240,6 +242,18 @@ static void heap_thread_free_large() {
   }
 }
 
+static void heap_thread_free_huge_worker() {
+  mi_free(shared_p);
+}
+
+static void heap_thread_free_huge() {
+  for (int i = 0; i < 100; i++) {
+    shared_p = mi_malloc(1024 * 1024 * 1024);
+    auto t1 = std::thread(heap_thread_free_large_worker);
+    t1.join();
+  }
+}
+
 
 
 static void test_mt_shutdown()
diff --git a/test/test-stress.c b/test/test-stress.c
index 61171d03..b766a5ca 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -91,7 +91,7 @@ static bool chance(size_t perc, random_t r) {
 
 static void* alloc_items(size_t items, random_t r) {
   if (chance(1, r)) {
-    if (chance(1, r) && allow_large_objects) items *= 10000;       // 0.01% giant
+    if (chance(1, r) && allow_large_objects) items *= 50000;       // 0.01% giant
     else if (chance(10, r) && allow_large_objects) items *= 1000;  // 0.1% huge
     else items *= 100;                                             // 1% large objects;
   }

From 58d12723d6817bb9e8141bc67651ea2a76900970 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 23 Nov 2022 10:34:19 -0800
Subject: [PATCH 186/352] make mi_collect(true) actually free the segment
 caches

---
 src/segment-cache.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 26786c92..d93fd644 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -133,14 +133,14 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 
 #define MI_MAX_PURGE_PER_PUSH  (4)
 
-static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld)
+static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld)
 {
   MI_UNUSED(tld);
   if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
   mi_msecs_t now = _mi_clock_now();
   size_t purged = 0;
-  const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
-  size_t idx              = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
+  const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
+  size_t idx              = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
   for (size_t visited = 0; visited < max_visits; visited++,idx++) {  // visit N slots
     if (idx >= MI_CACHE_MAX) idx = 0; // wrap
     mi_cache_slot_t* slot = &cache[idx];
@@ -164,13 +164,19 @@ static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld
         }
         _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
-      if (!force && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
+      if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
     }
   }
 }
 
 void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
-  mi_segment_cache_purge(force, tld );
+  if (force) {
+    // called on `mi_collect(true)` but not on thread termination    
+    _mi_segment_cache_free_all(tld);
+  }
+  else {
+    mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld);
+  }
 }
 
 void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
@@ -215,7 +221,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   }
 
   // purge expired entries
-  mi_segment_cache_purge(false /* force? */, tld);
+  mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld);
 
   // find an available slot
   mi_bitmap_index_t bitidx;

From 6988bbcca04e0c10db1da642676068dd89895f3d Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 27 Nov 2022 12:01:56 -0800
Subject: [PATCH 187/352] fix duplicate definition (issue #652

---
 test/main-override.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/test/main-override.cpp b/test/main-override.cpp
index f1c9a10b..9704a760 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -254,19 +254,6 @@ static void heap_thread_free_huge() {
   }
 }
 
-static void heap_thread_free_huge_worker() {
-  mi_free(shared_p);
-}
-
-static void heap_thread_free_huge() {
-  for (int i = 0; i < 10; i++) {
-    shared_p = mi_malloc(1024 * 1024 * 1024);
-    auto t1 = std::thread(heap_thread_free_large_worker);
-    t1.join();
-  }
-}
-
-
 static void test_mt_shutdown()
 {
   const int threads = 5;

From aea0de4777e4d59092bfaabd226175b29438c9f0 Mon Sep 17 00:00:00 2001
From: Ganesan Rajagopal <rganesan@gmail.com>
Date: Sat, 3 Dec 2022 16:27:33 +0530
Subject: [PATCH 188/352] Fix typo

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 58863099..d19a4a1c 100644
--- a/readme.md
+++ b/readme.md
@@ -78,7 +78,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
   (see [below](#performance)); please report if you observe any significant performance regression.
 
 * 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow detection. Initial
-  support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
+  support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
 
 * 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation
   even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix

From 0f796a56a98b224dc645cd2260b95e06139570cf Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 20 Dec 2022 18:59:55 -0800
Subject: [PATCH 189/352] fix bug where eager committed memory would be
 immediatedy decommitted; possible fix for issue #669

---
 src/alloc.c   |  4 ++--
 src/segment.c | 18 ++++++++++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 7bea69e9..b8270f1a 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -959,7 +959,7 @@ static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
 }
 
 
-mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
   void* p = mi_heap_malloc(heap,size);
   if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
   return p;
@@ -970,7 +970,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
 }
 
 
-mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
   size_t total;
   if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
     mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
diff --git a/src/segment.c b/src/segment.c
index 184197ef..5b4dbc7a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -574,7 +574,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) {
 }
 
 // note: can be called on abandoned segments
-static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
+static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_entries);
   mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) 
                           ? NULL : mi_span_queue_for(slice_count,tld));
@@ -594,7 +594,9 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
   }
 
   // perhaps decommit
-  mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats);
+  if (allow_decommit) {
+    mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
+  }
   
   // and push it on the free page queue (if it was not a huge page)
   if (sq != NULL) mi_span_queue_push( sq, slice );
@@ -656,12 +658,12 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
   }
 
   // and add the new free page
-  mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld);
+  mi_segment_span_free(segment, mi_slice_index(slice), slice_count, true, tld);
   return slice;
 }
 
 
-static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
+static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
   mi_assert_internal(_mi_ptr_segment(slice)==segment);
   mi_assert_internal(slice->slice_count >= slice_count);
   mi_assert_internal(slice->xblock_size > 0); // no more in free queue
@@ -669,7 +671,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz
   mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   size_t next_index = mi_slice_index(slice) + slice_count;
   size_t next_count = slice->slice_count - slice_count;
-  mi_segment_span_free(segment, next_index, next_count, tld);
+  mi_segment_span_free(segment, next_index, next_count, allow_decommit, tld);
   slice->slice_count = (uint32_t)slice_count;
 }
 
@@ -738,7 +740,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
           mi_span_queue_delete(sq, slice);
 
           if (slice->slice_count > slice_count) {
-            mi_segment_slice_split(segment, slice, slice_count, tld);
+            mi_segment_slice_split(segment, slice, slice_count, false /* don't decommit */, tld);
           }
           mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
           mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
@@ -872,7 +874,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
   segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
   if (segment->allow_decommit) {
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
+    segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
     segment->decommit_mask = decommit_mask;
     mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
     #if MI_DEBUG>2
@@ -919,7 +921,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   // initialize initial free pages
   if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
     mi_assert_internal(huge_page==NULL);
-    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, tld);
+    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld);
   }
   else {
     mi_assert_internal(huge_page!=NULL);

From d1fff1119a52e15c1c3807efe1077024c39fe70e Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Wed, 21 Dec 2022 12:19:09 -0800
Subject: [PATCH 190/352] reorganize span free code

---
 src/segment.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 5b4dbc7a..dc98e3e7 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -406,7 +406,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
 
 
 /* -----------------------------------------------------------
-   Span management
+   Commit/Decommit ranges
 ----------------------------------------------------------- */
 
 static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) {
@@ -569,6 +569,10 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
 }
 
 
+/* -----------------------------------------------------------
+   Span free
+----------------------------------------------------------- */
+
 static bool mi_segment_is_abandoned(mi_segment_t* segment) {
   return (segment->thread_id == 0);
 }
@@ -663,17 +667,10 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
 }
 
 
-static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
-  mi_assert_internal(_mi_ptr_segment(slice)==segment);
-  mi_assert_internal(slice->slice_count >= slice_count);
-  mi_assert_internal(slice->xblock_size > 0); // no more in free queue
-  if (slice->slice_count <= slice_count) return;
-  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
-  size_t next_index = mi_slice_index(slice) + slice_count;
-  size_t next_count = slice->slice_count - slice_count;
-  mi_segment_span_free(segment, next_index, next_count, allow_decommit, tld);
-  slice->slice_count = (uint32_t)slice_count;
-}
+
+/* -----------------------------------------------------------
+   Page allocation
+----------------------------------------------------------- */
 
 // Note: may still return NULL if committing the memory failed
 static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
@@ -725,6 +722,18 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   return page;
 }
 
+static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
+  mi_assert_internal(_mi_ptr_segment(slice) == segment);
+  mi_assert_internal(slice->slice_count >= slice_count);
+  mi_assert_internal(slice->xblock_size > 0); // no more in free queue
+  if (slice->slice_count <= slice_count) return;
+  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
+  size_t next_index = mi_slice_index(slice) + slice_count;
+  size_t next_count = slice->slice_count - slice_count;
+  mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld);
+  slice->slice_count = (uint32_t)slice_count;
+}
+
 static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX);
   // search from best fit up
@@ -740,7 +749,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
           mi_span_queue_delete(sq, slice);
 
           if (slice->slice_count > slice_count) {
-            mi_segment_slice_split(segment, slice, slice_count, false /* don't decommit */, tld);
+            mi_segment_slice_split(segment, slice, slice_count, tld);
           }
           mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
           mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);

From 7bb34e056c383e31ee9016ed46af42a53dceced8 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 23 Dec 2022 13:35:50 -0800
Subject: [PATCH 191/352] fix readme

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 364b974b..5f7c8867 100644
--- a/readme.md
+++ b/readme.md
@@ -12,7 +12,7 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the run-time systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
 
-Latest release tag: `v2.0.9` (2022-12-23).
+Latest release tag: `v2.0.9` (2022-12-23).  
 Latest stable  tag: `v1.7.9` (2022-12-23).
 
 mimalloc is a drop-in replacement for `malloc` and can be used in other programs

From 1e4b6b734e06b9f4723826e0db375987a44d5aac Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Tue, 31 Jan 2023 16:02:35 -0800
Subject: [PATCH 192/352] fix assertion that was too strict (issue #691)

---
 src/segment.c          |  3 ++-
 test/main-override.cpp | 45 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index dc98e3e7..683e413c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -632,7 +632,8 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
 
   // for huge pages, just mark as free but don't add to the queues
   if (segment->kind == MI_SEGMENT_HUGE) {
-    mi_assert_internal(segment->used == 1);  // decreased right after this call in `mi_segment_page_clear`
+    // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case)
+    mi_assert_internal((segment->used==0 && slice->xblock_size==0) || segment->used == 1);  // decreased right after this call in `mi_segment_page_clear`
     slice->xblock_size = 0;  // mark as free anyways
     // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to 
     // avoid a possible cache miss (and the segment is about to be freed)
diff --git a/test/main-override.cpp b/test/main-override.cpp
index 7242eb29..40787831 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -37,15 +37,16 @@ static void fail_aslr();              // issue #372
 static void tsan_numa_test();         // issue #414
 static void strdup_test();            // issue #445 
 static void bench_alloc_large(void);  // issue #xxx
+static void test_large_migrate(void); // issue #691
 static void heap_thread_free_huge();
 
 static void test_stl_allocators();
 
 
 int main() {
-  mi_stats_reset();  // ignore earlier allocations
-  heap_thread_free_huge();
+  mi_stats_reset();  // ignore earlier allocations  
   /*
+   heap_thread_free_huge();
    heap_thread_free_large();
    heap_no_delete();
    heap_late_free();
@@ -55,8 +56,9 @@ int main() {
    tsan_numa_test();
    strdup_test();
   */
-  test_stl_allocators();
-  test_mt_shutdown();
+  // test_stl_allocators();
+  // test_mt_shutdown();
+  test_large_migrate();
   
   //fail_aslr();
   bench_alloc_large();
@@ -171,6 +173,41 @@ static void test_stl_allocators() {
   test_stl_allocator6();
 }
 
+
+// issue #691
+static char* cptr;
+
+static void* thread1_allocate()
+{
+  cptr = mi_calloc_tp(char,22085632);
+  return NULL;
+}
+
+static void* thread2_free()
+{
+  assert(cptr);
+  mi_free(cptr);
+  cptr = NULL;
+  return NULL;
+}
+
+static void test_large_migrate(void) {
+  auto t1 = std::thread(thread1_allocate);
+  t1.join();
+  auto t2 = std::thread(thread2_free);
+  t2.join();
+  /*
+  pthread_t thread1, thread2;
+
+  pthread_create(&thread1, NULL, &thread1_allocate, NULL);
+  pthread_join(thread1, NULL);
+
+  pthread_create(&thread2, NULL, &thread2_free, NULL);
+  pthread_join(thread2, NULL);
+  */
+  return;
+}
+
 // issue 445
 static void strdup_test() {
 #ifdef _MSC_VER

From 8be4cee4186120c32def9789c450a185c7213914 Mon Sep 17 00:00:00 2001
From: daan <daan@effp.org>
Date: Wed, 16 Nov 2022 18:52:40 -0800
Subject: [PATCH 193/352] change max align size to 8

---
 include/mimalloc-types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index f3af528e..a7c4d3c6 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -19,7 +19,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Minimal alignment necessary. On most platforms 16 bytes are needed
 // due to SSE registers for example. This must be at least `sizeof(void*)`
 #ifndef MI_MAX_ALIGN_SIZE
-#define MI_MAX_ALIGN_SIZE  16   // sizeof(max_align_t)
+#define MI_MAX_ALIGN_SIZE  8   // sizeof(max_align_t)
 #endif
 
 // ------------------------------------------------------

From 5fe4a3480ffb2aa21bf12b1e92c220ab575a582f Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Mon, 20 Feb 2023 12:21:06 -0800
Subject: [PATCH 194/352] revert default max align commit back to 16

---
 include/mimalloc-types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index a7c4d3c6..f3af528e 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -19,7 +19,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Minimal alignment necessary. On most platforms 16 bytes are needed
 // due to SSE registers for example. This must be at least `sizeof(void*)`
 #ifndef MI_MAX_ALIGN_SIZE
-#define MI_MAX_ALIGN_SIZE  8   // sizeof(max_align_t)
+#define MI_MAX_ALIGN_SIZE  16   // sizeof(max_align_t)
 #endif
 
 // ------------------------------------------------------

From 6f31115c7f5e62a33eb8fe99888591bc6c045173 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 5 Mar 2023 22:11:42 -0800
Subject: [PATCH 195/352] fix segment defined memory for valgrind

---
 src/segment.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 57ba7068..2698d578 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -874,11 +874,18 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   if (segment == NULL) return NULL;
   
   // zero the segment info? -- not always needed as it may be zero initialized from the OS 
+  mi_track_mem_defined(segment, offsetof(mi_segment_t, next)); // needed for valgrind
   mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);  // tsan
-  if (!is_zero) {
+  {
     ptrdiff_t ofs = offsetof(mi_segment_t, next);
     size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
-    memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1));  // one more
+    size_t    zsize = prefix + sizeof(mi_slice_t) * (segment_slices + 1); // one more
+    if (!is_zero) {
+      memset((uint8_t*)segment + ofs, 0, zsize);
+    }
+    else {
+      mi_track_mem_defined((uint8_t*)segment + ofs, zsize);  // todo: somehow needed for valgrind?
+    }
   }
   
   segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed

From 7ec798e19726d4314b90c61c68202457a380b1fa Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 5 Mar 2023 22:54:10 -0800
Subject: [PATCH 196/352] make test-stress match the one in dev

---
 test/test-stress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test-stress.c b/test/test-stress.c
index 8b96a5ae..69650556 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -91,7 +91,7 @@ static bool chance(size_t perc, random_t r) {
 
 static void* alloc_items(size_t items, random_t r) {
   if (chance(1, r)) {
-    if (chance(1, r) && allow_large_objects) items *= 50000;       // 0.01% giant
+    if (chance(1, r) && allow_large_objects) items *= 10000;       // 0.01% giant
     else if (chance(10, r) && allow_large_objects) items *= 1000;  // 0.1% huge
     else items *= 100;                                             // 1% large objects;
   }

From a90737a7fa445b3a1afcb899c162cf670bc473fb Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 6 Mar 2023 10:44:43 -0800
Subject: [PATCH 197/352] fix valgrind tracking for zero initialized segments

---
 src/segment.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 2698d578..78171907 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -796,8 +796,6 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
     const size_t extra = align_offset - info_size;
     // recalculate due to potential guard pages
     *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices);
-    //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE);
-    //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE;
   }
   const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE;
   mi_segment_t* segment = NULL;
@@ -831,7 +829,10 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
     if (!ok) return NULL; // failed to commit 
     mi_commit_mask_set(pcommit_mask, &commit_needed_mask); 
   }
-  mi_track_mem_undefined(segment,commit_needed*MI_COMMIT_SIZE);
+  else if (*is_zero) {
+    // track zero initialization for valgrind
+    mi_track_mem_defined(segment, commit_needed * MI_COMMIT_SIZE);        
+  }
   segment->memid = memid;
   segment->mem_is_pinned = is_pinned;
   segment->mem_is_large = mem_large;
@@ -874,18 +875,14 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   if (segment == NULL) return NULL;
   
   // zero the segment info? -- not always needed as it may be zero initialized from the OS 
-  mi_track_mem_defined(segment, offsetof(mi_segment_t, next)); // needed for valgrind
   mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);  // tsan
   {
-    ptrdiff_t ofs = offsetof(mi_segment_t, next);
+    ptrdiff_t ofs    = offsetof(mi_segment_t, next);
     size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
-    size_t    zsize = prefix + sizeof(mi_slice_t) * (segment_slices + 1); // one more
+    size_t    zsize  = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more
     if (!is_zero) {
       memset((uint8_t*)segment + ofs, 0, zsize);
-    }
-    else {
-      mi_track_mem_defined((uint8_t*)segment + ofs, zsize);  // todo: somehow needed for valgrind?
-    }
+    }  
   }
   
   segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed

From 8fbe7aae50a959cbb5324f675dcfd8c4ff18312d Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 19 Mar 2023 19:11:43 -0700
Subject: [PATCH 198/352] update process info primitive api

---
 src/prim/prim.c         |  2 +-
 src/prim/prim.h         | 16 +++++++++++++---
 src/prim/unix/prim.c    | 36 ++++++++++++++----------------------
 src/prim/wasi/prim.c    | 12 ++++--------
 src/prim/windows/prim.c | 16 ++++++++--------
 src/stats.c             | 36 +++++++++++++++++++-----------------
 6 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/src/prim/prim.c b/src/prim/prim.c
index eec13c48..109ab8e8 100644
--- a/src/prim/prim.c
+++ b/src/prim/prim.c
@@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "windows/prim.c"  // VirtualAlloc (Windows)
 #elif defined(__wasi__)
 #define MI_USE_SBRK
-#include "wasi/prim.h"     // memory-grow or sbrk (Wasm)
+#include "wasi/prim.c"     // memory-grow or sbrk (Wasm)
 #else
 #include "unix/prim.c"     // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.)
 #endif
diff --git a/src/prim/prim.h b/src/prim/prim.h
index 967c6698..3130d489 100644
--- a/src/prim/prim.h
+++ b/src/prim/prim.h
@@ -59,9 +59,18 @@ size_t _mi_prim_numa_node_count(void);
 mi_msecs_t _mi_prim_clock_now(void);
 
 // Return process information (only for statistics)
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, 
-                             size_t* current_rss, size_t* peak_rss, 
-                             size_t* current_commit, size_t* peak_commit, size_t* page_faults);
+typedef struct mi_process_info_s {
+  mi_msecs_t  elapsed;
+  mi_msecs_t  utime;
+  mi_msecs_t  stime; 
+  size_t      current_rss; 
+  size_t      peak_rss;  
+  size_t      current_commit;
+  size_t      peak_commit; 
+  size_t      page_faults;
+} mi_process_info_t;
+
+void _mi_prim_process_info(mi_process_info_t* pinfo);
 
 // Default stderr output. (only for warnings etc. with verbose enabled)
 // msg != NULL && _mi_strlen(msg) > 0
@@ -202,6 +211,7 @@ This is inlined here as it is on the fast path for allocation functions.
 On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
 __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
 that the storage will always be available (allocated on the thread stacks).
+
 On some platforms though we cannot use that when overriding `malloc` since the underlying
 TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
 We try to circumvent this in an efficient way:
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index d1cd4301..1040c791 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -541,19 +541,15 @@ static mi_msecs_t timeval_secs(const struct timeval* tv) {
   return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
 }
 
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+void _mi_prim_process_info(mi_process_info_t* pinfo)
 {
   struct rusage rusage;
   getrusage(RUSAGE_SELF, &rusage);
-  *utime = timeval_secs(&rusage.ru_utime);
-  *stime = timeval_secs(&rusage.ru_stime);
+  pinfo->utime = timeval_secs(&rusage.ru_utime);
+  pinfo->stime = timeval_secs(&rusage.ru_stime);
 #if !defined(__HAIKU__)
-  *page_faults = rusage.ru_majflt;
-#endif
-  // estimate commit using our stats
-  *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
-  *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *current_rss    = *current_commit;  // estimate
+  pinfo->page_faults = rusage.ru_majflt;
+#endif  
 #if defined(__HAIKU__)
   // Haiku does not have (yet?) a way to
   // get these stats per process
@@ -562,19 +558,20 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current
   ssize_t c;
   get_thread_info(find_thread(0), &tid);
   while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
-    *peak_rss += mem.ram_size;
+    pinfo->peak_rss += mem.ram_size;
   }
-  *page_faults = 0;
+  pinfo->page_faults = 0;
 #elif defined(__APPLE__)
-  *peak_rss = rusage.ru_maxrss;         // BSD reports in bytes
+  pinfo->peak_rss = rusage.ru_maxrss;         // BSD reports in bytes
   struct mach_task_basic_info info;
   mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
   if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
-    *current_rss = (size_t)info.resident_size;
+    pinfo->current_rss = (size_t)info.resident_size;
   }
 #else
-  *peak_rss = rusage.ru_maxrss * 1024;  // Linux reports in KiB
+  pinfo->peak_rss = rusage.ru_maxrss * 1024;  // Linux reports in KiB
 #endif
+  // use defaults for commit
 }
 
 #else
@@ -584,15 +581,10 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current
 #pragma message("define a way to get process info")
 #endif
 
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+void _mi_prim_process_info(mi_process_info_t* pinfo)
 {
-  *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
-  *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *peak_rss    = *peak_commit;
-  *current_rss = *current_commit;
-  *page_faults = 0;
-  *utime = 0;
-  *stime = 0;
+  // use defaults
+  MI_UNUSED(pinfo);
 }
 
 #endif
diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c
index b8ac1a1b..89c04d78 100644
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@@ -194,17 +194,13 @@ mi_msecs_t _mi_prim_clock_now(void) {
 // Process info
 //----------------------------------------------------------------
 
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+void _mi_prim_process_info(mi_process_info_t* pinfo)
 {
-  *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
-  *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *peak_rss    = *peak_commit;
-  *current_rss = *current_commit;
-  *page_faults = 0;
-  *utime = 0;
-  *stime = 0;
+  // use defaults
+  MI_UNUSED(pinfo);
 }
 
+
 //----------------------------------------------------------------
 // Output
 //----------------------------------------------------------------
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 2fa445a1..1ce44a10 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -428,15 +428,15 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
 typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
 static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
 
-void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+void _mi_prim_process_info(mi_process_info_t* pinfo)
 {
   FILETIME ct;
   FILETIME ut;
   FILETIME st;
   FILETIME et;
   GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
-  *utime = filetime_msecs(&ut);
-  *stime = filetime_msecs(&st);
+  pinfo->utime = filetime_msecs(&ut);
+  pinfo->stime = filetime_msecs(&st);
   
   // load psapi on demand
   if (pGetProcessMemoryInfo == NULL) {
@@ -452,11 +452,11 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current
   if (pGetProcessMemoryInfo != NULL) {
     pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
   } 
-  *current_rss    = (size_t)info.WorkingSetSize;
-  *peak_rss       = (size_t)info.PeakWorkingSetSize;
-  *current_commit = (size_t)info.PagefileUsage;
-  *peak_commit    = (size_t)info.PeakPagefileUsage;
-  *page_faults    = (size_t)info.PageFaultCount;
+  pinfo->current_rss    = (size_t)info.WorkingSetSize;
+  pinfo->peak_rss       = (size_t)info.PeakWorkingSetSize;
+  pinfo->current_commit = (size_t)info.PagefileUsage;
+  pinfo->peak_commit    = (size_t)info.PeakPagefileUsage;
+  pinfo->page_faults    = (size_t)info.PageFaultCount;
 }
 
 //----------------------------------------------------------------
diff --git a/src/stats.c b/src/stats.c
index 357bebce..4bc8835c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -430,21 +430,23 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
 
 mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
 {
-  mi_msecs_t elapsed = _mi_clock_end(mi_process_start);
-  mi_msecs_t utime = 0;
-  mi_msecs_t stime = 0;
-  size_t current_rss0 = 0;
-  size_t peak_rss0 = 0;
-  size_t current_commit0 = 0;
-  size_t peak_commit0 = 0;
-  size_t page_faults0 = 0;
-  _mi_prim_process_info(&utime, &stime, &current_rss0, &peak_rss0, &current_commit0, &peak_commit0, &page_faults0);
-  if (elapsed_msecs!=NULL)  *elapsed_msecs  = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX));
-  if (user_msecs!=NULL)     *user_msecs     = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX));
-  if (system_msecs!=NULL)   *system_msecs   = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX));
-  if (current_rss!=NULL)    *current_rss    = current_rss0;
-  if (peak_rss!=NULL)       *peak_rss       = peak_rss0;
-  if (current_commit!=NULL) *current_commit = current_commit0;
-  if (peak_commit!=NULL)    *peak_commit    = peak_commit0;
-  if (page_faults!=NULL)    *page_faults    = page_faults0;
+  mi_process_info_t pinfo = { 0 };
+  pinfo.elapsed = _mi_clock_end(mi_process_start);
+  pinfo.utime   = 0;
+  pinfo.stime   = 0;
+  pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
+  pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
+  pinfo.current_rss = pinfo.current_commit;
+  pinfo.peak_rss    = pinfo.peak_commit;
+  pinfo.page_faults = 0;
+
+  _mi_prim_process_info(&pinfo);
+  if (elapsed_msecs!=NULL)  *elapsed_msecs  = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX));
+  if (user_msecs!=NULL)     *user_msecs     = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX));
+  if (system_msecs!=NULL)   *system_msecs   = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX));
+  if (current_rss!=NULL)    *current_rss    = pinfo.current_rss;
+  if (peak_rss!=NULL)       *peak_rss       = pinfo.peak_rss;
+  if (current_commit!=NULL) *current_commit = pinfo.current_commit;
+  if (peak_commit!=NULL)    *peak_commit    = pinfo.peak_commit;
+  if (page_faults!=NULL)    *page_faults    = pinfo.page_faults;
 }

From 99c9f55511ea62e80cf7dd28182799a940d4b6bd Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Sun, 19 Mar 2023 20:21:20 -0700
Subject: [PATCH 199/352] simplify primitives API

---
 src/os.c                | 27 +++++++++++++------
 src/prim/prim.h         |  7 ++---
 src/prim/unix/prim.c    | 60 ++++++++++++++++++++++-------------------
 src/prim/wasi/prim.c    | 19 ++++++++-----
 src/prim/windows/prim.c | 28 ++++++++++---------
 5 files changed, 83 insertions(+), 58 deletions(-)

diff --git a/src/os.c b/src/os.c
index 56f96bf2..5263af42 100644
--- a/src/os.c
+++ b/src/os.c
@@ -135,7 +135,10 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
   MI_UNUSED(tld_stats);
   mi_assert_internal((size % _mi_os_page_size()) == 0);
   if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
-  _mi_prim_free(addr, size);
+  int err = _mi_prim_free(addr, size);
+  if (err != 0) {
+    _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
+  }
   mi_stats_t* stats = &_mi_stats_main;
   if (was_committed) { _mi_stat_decrease(&stats->committed, size); }
   _mi_stat_decrease(&stats->reserved, size);
@@ -163,7 +166,11 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
   if (!commit) allow_large = false;
   if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning
 
-  void* p = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large);
+  void* p = NULL; 
+  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p);
+  if (err != 0) {
+    _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
+  }
   /*
   if (commit && allow_large) {
     p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment);
@@ -200,7 +207,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
   // if not aligned, free it, overallocate, and unmap around it
   if (((uintptr_t)p % alignment != 0)) {
     mi_os_mem_free(p, size, commit, stats);
-    _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit);
+    _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
     if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
     const size_t over_size = size + alignment;
 
@@ -357,7 +364,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
 
   int err = _mi_prim_commit(start, csize, commit);  
   if (err != 0) {
-    _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
+    _mi_warning_message("cannot %s OS memory (error: %d (0x%d), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize);
   }
   mi_assert_internal(err == 0);
   return (err == 0);
@@ -404,7 +411,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
 
   int err = _mi_prim_reset(start, csize);
   if (err != 0) {
-    _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, err);
+    _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
   }
   return (err == 0);
 }
@@ -441,7 +448,7 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   */
   int err = _mi_prim_protect(start,csize,protect);
   if (err != 0) {
-    _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err);
+    _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);
   }
   return (err == 0);
 }
@@ -516,13 +523,17 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
   for (page = 0; page < pages; page++) {
     // allocate a page
     void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
-    void* p = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node);
+    void* p = NULL;
+    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p);
+    if (err != 0) {
+      _mi_warning_message("unable to allocate huge OS page (error: %d (0x%d), address: %p, size: %zx bytes)", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
+    }
 
     // Did we succeed at a contiguous address?
     if (p != addr) {
       // no success, issue a warning and break
       if (p != NULL) {
-        _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr);
+        _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
         _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
       }
       break;
diff --git a/src/prim/prim.h b/src/prim/prim.h
index 3130d489..1a4fb5d8 100644
--- a/src/prim/prim.h
+++ b/src/prim/prim.h
@@ -11,6 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // note: on all primitive functions, we always get:
 //  addr != NULL and page aligned
 //  size > 0     and page aligned
+//  return value is an error code an int where 0 is success.
 
 // OS memory configuration
 typedef struct mi_os_mem_config_s {
@@ -25,13 +26,13 @@ typedef struct mi_os_mem_config_s {
 void _mi_prim_mem_init( mi_os_mem_config_t* config );
 
 // Free OS memory
-void _mi_prim_free(void* addr, size_t size );
+int _mi_prim_free(void* addr, size_t size );
   
 // Allocate OS memory. Return NULL on error.
 // The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
 // pre: !commit => !allow_large
 //      try_alignment >= _mi_os_page_size() and a power of 2
-void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large);
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr);
 
 // Commit memory. Returns error code or 0 on success.
 int _mi_prim_commit(void* addr, size_t size, bool commit);
@@ -47,7 +48,7 @@ int _mi_prim_protect(void* addr, size_t size, bool protect);
 // pre: size > 0  and a multiple of 1GiB.
 //      addr is either NULL or an address hint.
 //      numa_node is either negative (don't care), or a numa node number.
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node);
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr);
 
 // Return the current NUMA node
 size_t _mi_prim_numa_node(void);
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 1040c791..5a3ca5ab 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -96,11 +96,9 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
 // free
 //---------------------------------------------
 
-void _mi_prim_free(void* addr, size_t size ) {
+int _mi_prim_free(void* addr, size_t size ) {
   bool err = (munmap(addr, size) == -1);
-  if (err) {
-    _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size);
-  }
+  return (err ? errno : 0);
 }
 
 
@@ -118,19 +116,24 @@ static int unix_madvise(void* addr, size_t size, int advice) {
 
 static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
   MI_UNUSED(try_alignment);
+  void* p = NULL;
   #if defined(MAP_ALIGNED)  // BSD
   if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
     size_t n = mi_bsr(try_alignment);
     if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
       flags |= MAP_ALIGNED(n);
-      void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
+      p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
+      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { 
+        int err = errno;
+        _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%d), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
+      }
       if (p!=MAP_FAILED) return p;
-      // fall back to regular mmap
+      // fall back to regular mmap      
     }
   }
   #elif defined(MAP_ALIGN)  // Solaris
   if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
-    void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);  // addr parameter is the required alignment
+    p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);  // addr parameter is the required alignment
     if (p!=MAP_FAILED) return p;
     // fall back to regular mmap
   }
@@ -140,14 +143,18 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
   if (addr == NULL) {
     void* hint = _mi_os_get_aligned_hint(try_alignment, size);
     if (hint != NULL) {
-      void* p = mmap(hint, size, protect_flags, flags, fd, 0);
+      p = mmap(hint, size, protect_flags, flags, fd, 0);
+      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { 
+        int err = errno;
+        _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%d), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
+      }
       if (p!=MAP_FAILED) return p;
-      // fall back to regular mmap
+      // fall back to regular mmap      
     }
   }
   #endif
   // regular mmap
-  void* p = mmap(addr, size, protect_flags, flags, fd, 0);
+  p = mmap(addr, size, protect_flags, flags, fd, 0);
   if (p!=MAP_FAILED) return p;
   // failed to allocate
   return NULL;
@@ -217,7 +224,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
         #ifdef MAP_HUGE_1GB
         if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
           mi_huge_pages_available = false; // don't try huge 1GiB pages again
-          _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno);
+          _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
           lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
           p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
         }
@@ -258,20 +265,18 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
       #endif
     }
   }
-  if (p == NULL) {
-    _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large);
-  }
   return p;
 }
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
   
   int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
-  return unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
+  *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
+  return (*addr != NULL ? 0 : errno);
 }
 
 
@@ -379,28 +384,29 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co
 }
 #endif
 
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) {
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
   bool is_large = true;
-  void* p = unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
-  if (p == NULL) return NULL;
-  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
+  *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
+  if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
     unsigned long numa_mask = (1UL << numa_node);
     // TODO: does `mbind` work correctly for huge OS pages? should we
     // use `set_mempolicy` before calling mmap instead?
     // see: <https://lkml.org/lkml/2017/2/9/875>
-    long err = mi_prim_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
+    long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
     if (err != 0) {
-      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno));
-    }
+      err = errno;
+      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%d))\n", numa_node, err, err);
+    }    
   }
-  return p;
+  return (*addr != NULL ? 0 : errno);
 }
 
 #else
 
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) {
-  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node);
-  return NULL;
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *addr = NULL;
+  return ENOMEM;
 }
 
 #endif
diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c
index 89c04d78..f995304f 100644
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@@ -27,9 +27,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
 // Free
 //---------------------------------------------
 
-void _mi_prim_free(void* addr, size_t size ) {
+int _mi_prim_free(void* addr, size_t size ) {
   MI_UNUSED(addr); MI_UNUSED(size);
   // wasi heap cannot be shrunk
+  return 0;
 }
 
 
@@ -101,20 +102,23 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) {
       }
     }
   }
+  /*
   if (p == NULL) {
     _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment);
     errno = ENOMEM;
     return NULL;
   }
-  mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 );
+  */
+  mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 );
   return p;
 }
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
   MI_UNUSED(allow_large); MI_UNUSED(commit);
   *is_large = false;
-  return mi_prim_mem_grow(size, try_alignment);
+  *addr = mi_prim_mem_grow(size, try_alignment);
+  return (*addr != NULL ? 0 : ENOMEM);
 }
 
 
@@ -142,9 +146,10 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) {
 // Huge pages and NUMA nodes
 //---------------------------------------------
 
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) {
-  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node);
-  return NULL;
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *addr = NULL;
+  return ENOSYS;
 }
 
 size_t _mi_prim_numa_node(void) {
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 1ce44a10..1e15273a 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -156,7 +156,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
 // Free
 //---------------------------------------------
 
-void _mi_prim_free(void* addr, size_t size ) {
+int _mi_prim_free(void* addr, size_t size ) {
   DWORD errcode = 0;
   bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
   if (err) { errcode = GetLastError(); }
@@ -172,9 +172,7 @@ void _mi_prim_free(void* addr, size_t size ) {
       if (err) { errcode = GetLastError(); }
     }
   }
-  if (errcode != 0) {
-    _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size);
-  }
+  return (int)errcode;
 }
 
 
@@ -240,19 +238,18 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW
     *is_large = ((flags&MEM_LARGE_PAGES) != 0);
     p = win_virtual_alloc_prim(addr, size, try_alignment, flags);
   }
-  if (p == NULL) {
-    _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large);
-  }
+  //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); }
   return p;
 }
 
-void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) {
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
   int flags = MEM_RESERVE;
   if (commit) { flags |= MEM_COMMIT; }
-  return win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
+  *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
+  return (*addr != NULL ? 0 : (int)GetLastError());
 }
 
 
@@ -296,7 +293,7 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) {
 // Huge page allocation
 //---------------------------------------------
 
-void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node)
+static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node)
 {
   const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
 
@@ -315,7 +312,7 @@ void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node)
       params[1].Arg.ULong = (unsigned)numa_node;
     }
     SIZE_T psize = size;
-    void* base = addr;
+    void* base = hint_addr;
     NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
     if (err == 0 && base != NULL) {
       return base;
@@ -330,11 +327,16 @@ void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node)
   if (pVirtualAlloc2 != NULL && numa_node >= 0) {
     params[0].Type.Type = MiMemExtendedParameterNumaNode;
     params[0].Arg.ULong = (unsigned)numa_node;
-    return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
+    return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1);
   }
 
   // otherwise use regular virtual alloc on older windows
-  return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
+  return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
+}
+
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) {
+  *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
+  return (*addr != NULL ? 0 : (int)GetLastError());
 }
 
 

From 993c0a49b4196c807da226b6796a262a062ff1eb Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 20 Mar 2023 11:06:28 -0700
Subject: [PATCH 200/352] fix includes

---
 src/segment-cache.c | 6 +++---
 src/static.c        | 7 -------
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index d93fd644..4a16a18a 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -11,10 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file
   The full memory map of all segments is also implemented here.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
 
-#include "bitmap.h"  // atomic bitmap
+#include "./bitmap.h"  // atomic bitmap
 
 //#define MI_CACHE_DISABLE 1    // define to completely disable the segment cache
 
diff --git a/src/static.c b/src/static.c
index a71cddca..d992f4da 100644
--- a/src/static.c
+++ b/src/static.c
@@ -29,15 +29,8 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "init.c"
 #include "options.c"
 #include "os.c"
-<<<<<<< HEAD
-#include "page.c"
-#include "prim/prim.c"
-#include "random.c"
-=======
 #include "page.c"           // includes page-queue.c
 #include "random.c" 
-#include "region.c"
->>>>>>> dev-platform
 #include "segment.c"
 #include "segment-cache.c"
 #include "stats.c"

From 90f866c5bcc77496bf19df7bc85ab8a42b6b2490 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 20 Mar 2023 11:45:34 -0700
Subject: [PATCH 201/352] fix warnings for issues #709

---
 src/segment.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/segment.c b/src/segment.c
index 648116c3..1b73b19a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -959,7 +959,9 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
   // Remove the free pages
   mi_slice_t* slice = &segment->slices[0];
   const mi_slice_t* end = mi_segment_slices_end(segment);
+  #if MI_DEBUG>1
   size_t page_count = 0;
+  #endif
   while (slice < end) {
     mi_assert_internal(slice->slice_count > 0);
     mi_assert_internal(slice->slice_offset == 0);
@@ -967,7 +969,9 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
     if (slice->xblock_size == 0 && segment->kind != MI_SEGMENT_HUGE) {
       mi_segment_span_remove_from_queue(slice, tld);
     }
+    #if MI_DEBUG>1
     page_count++;
+    #endif
     slice = slice + slice->slice_count;
   }
   mi_assert_internal(page_count == 2); // first page is allocated by the segment itself

From 4bf63300b3935245559833887277e4aa06814940 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 20 Mar 2023 12:29:36 -0700
Subject: [PATCH 202/352] fix alignment issue #700

---
 ide/vs2022/mimalloc.vcxproj |  2 +-
 src/segment.c               |  4 +++-
 test/main-override-static.c |  9 +++++++++
 test/test-api.c             | 18 ++++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 07a854ab..894c5030 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -116,7 +116,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>Default</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=0;MI_SECURE=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp20</LanguageStandard>
diff --git a/src/segment.c b/src/segment.c
index 1b73b19a..1e23bb1a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -316,7 +316,9 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
   ptrdiff_t idx = slice - segment->slices;
   size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
   // make the start not OS page aligned for smaller blocks to avoid page/cache effects
-  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); 
+  // note: the offset must always be an xblock_size multiple since we assume small allocations
+  // are aligned (see `mi_heap_malloc_aligned`).
+  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 512 ? xblock_size : 0); 
   if (page_size != NULL) { *page_size = psize - start_offset; }
   return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
 }
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 534c8849..5e8b7333 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -20,6 +20,7 @@ static void negative_stat(void);
 static void alloc_huge(void);
 static void test_heap_walk(void);
 static void test_heap_arena(void);
+static void test_align(void);
 
 int main() {
   mi_version();
@@ -37,6 +38,7 @@ int main() {
   // alloc_huge();
   // test_heap_walk();
   // test_heap_arena();
+  test_align();
   
   void* p1 = malloc(78);
   void* p2 = malloc(24);
@@ -68,6 +70,13 @@ int main() {
   return 0;
 }
 
+static void test_align() {
+  void* p = mi_malloc_aligned(256, 256);
+  if (((uintptr_t)p % 256) != 0) {
+    fprintf(stderr, "%p is not 256 alignend!\n", p);
+  }
+}
+
 static void invalid_free() {
   free((void*)0xBADBEEF);
   realloc((void*)0xBADBEEF,10);
diff --git a/test/test-api.c b/test/test-api.c
index c78e1972..1967dad7 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -212,6 +212,24 @@ int main(void) {
     result = mi_heap_contains_block(heap, p);
     mi_heap_destroy(heap);
   }
+  CHECK_BODY("malloc-aligned12") {
+    bool ok = true;
+    const size_t align = 256;
+    for (int j = 1; j < 1000; j++) {
+      void* ps[1000];
+      for (int i = 0; i < 1000 && ok; i++) {
+        ps[i] = mi_malloc_aligned(j  // size
+                                  , align);
+        if (ps[i] == NULL || ((uintptr_t)(ps[i]) % align) != 0) {
+          ok = false;
+        }
+      }
+      for (int i = 0; i < 1000 && ok; i++) {
+        mi_free(ps[i]);
+      }
+    }
+    result = ok;
+  };
   CHECK_BODY("malloc-aligned-at1") {
     void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
   };

From c935521bf92acfa539ea5f896e0f6d12611c9d5d Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 20 Mar 2023 12:32:41 -0700
Subject: [PATCH 203/352] fix test and project

---
 ide/vs2022/mimalloc.vcxproj |  2 +-
 test/main-override-static.c |  2 +-
 test/test-api.c             | 18 ------------------
 3 files changed, 2 insertions(+), 20 deletions(-)

diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 894c5030..07a854ab 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -116,7 +116,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>Default</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=0;MI_SECURE=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp20</LanguageStandard>
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 5e8b7333..e71be29e 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -38,7 +38,7 @@ int main() {
   // alloc_huge();
   // test_heap_walk();
   // test_heap_arena();
-  test_align();
+  // test_align();
   
   void* p1 = malloc(78);
   void* p2 = malloc(24);
diff --git a/test/test-api.c b/test/test-api.c
index 1967dad7..c78e1972 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -212,24 +212,6 @@ int main(void) {
     result = mi_heap_contains_block(heap, p);
     mi_heap_destroy(heap);
   }
-  CHECK_BODY("malloc-aligned12") {
-    bool ok = true;
-    const size_t align = 256;
-    for (int j = 1; j < 1000; j++) {
-      void* ps[1000];
-      for (int i = 0; i < 1000 && ok; i++) {
-        ps[i] = mi_malloc_aligned(j  // size
-                                  , align);
-        if (ps[i] == NULL || ((uintptr_t)(ps[i]) % align) != 0) {
-          ok = false;
-        }
-      }
-      for (int i = 0; i < 1000 && ok; i++) {
-        mi_free(ps[i]);
-      }
-    }
-    result = ok;
-  };
   CHECK_BODY("malloc-aligned-at1") {
     void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
   };

From a582d760ed8266af9fab445bf3e06e65d073a6f3 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Mon, 20 Mar 2023 12:39:15 -0700
Subject: [PATCH 204/352] refine start offset in a page

---
 src/segment.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index 1e23bb1a..451ef250 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -318,7 +318,11 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
   // make the start not OS page aligned for smaller blocks to avoid page/cache effects
   // note: the offset must always be an xblock_size multiple since we assume small allocations
   // are aligned (see `mi_heap_malloc_aligned`).
-  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 512 ? xblock_size : 0); 
+  size_t start_offset = 0;
+  if (xblock_size >= MI_INTPTR_SIZE) {
+    if (xblock_size <= 64) { start_offset = 3*xblock_size; }
+    else if (xblock_size <= 512) { start_offset = xblock_size; }
+  }
   if (page_size != NULL) { *page_size = psize - start_offset; }
   return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
 }

From 1cbc55f2b8baccf8225024923cc840a5cc0773e7 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Thu, 23 Mar 2023 13:05:10 -0700
Subject: [PATCH 205/352] fix initialization of decommit mask for huge pages

---
 src/segment.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/segment.c b/src/segment.c
index 451ef250..c9525490 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -509,6 +509,7 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_
   mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
   // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow
   if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed
+  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   return mi_segment_commitx(segment,true,p,size,stats);
 }
 
@@ -904,6 +905,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
     mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask));
     #endif
   }    
+  else {
+    segment->decommit_expire = 0;
+    mi_commit_mask_create_empty( &segment->decommit_mask );
+  }
   
   // initialize segment info
   const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);

From 165b84705132bac86dc680620bd1c35f639809bc Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Thu, 23 Mar 2023 16:11:38 -0700
Subject: [PATCH 206/352] improve segment_cache assertions

---
 include/mimalloc/internal.h |  2 +-
 src/os.c                    |  2 +-
 src/page.c                  |  2 +
 src/prim/unix/prim.c        |  2 +-
 src/segment-cache.c         | 77 ++++++++++++++++++++-----------------
 src/segment.c               |  2 +-
 6 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 8c9e98a1..710b9e6f 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -115,7 +115,7 @@ mi_arena_id_t _mi_arena_id_none(void);
 bool       _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id);
 
 // "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
 bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
 void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
 void       _mi_segment_cache_free_all(mi_os_tld_t* tld);
diff --git a/src/os.c b/src/os.c
index d6c94b11..5ac37c2e 100644
--- a/src/os.c
+++ b/src/os.c
@@ -364,7 +364,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
 
   int err = _mi_prim_commit(start, csize, commit);  
   if (err != 0) {
-    _mi_warning_message("cannot %s OS memory (error: %d (0x%d), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize);
+    _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize);
   }
   mi_assert_internal(err == 0);
   return (err == 0);
diff --git a/src/page.c b/src/page.c
index f650af31..fd1af187 100644
--- a/src/page.c
+++ b/src/page.c
@@ -92,8 +92,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   }
   #endif
 
+  #if !MI_TSAN
   mi_block_t* tfree = mi_page_thread_free(page);
   mi_assert_internal(mi_page_list_is_valid(page, tfree));
+  #endif
   //size_t tfree_count = mi_page_list_count(page, tfree);
   //mi_assert_internal(tfree_count <= page->thread_freed + 1);
 
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 0ca9bc64..e51fb6bd 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -395,7 +395,7 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, vo
     long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
     if (err != 0) {
       err = errno;
-      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%d))\n", numa_node, err, err);
+      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err);
     }    
   }
   return (*addr != NULL ? 0 : errno);
diff --git a/src/segment-cache.c b/src/segment-cache.c
index 4a16a18a..6f9d5fcb 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -35,8 +35,8 @@ typedef struct mi_cache_slot_s {
 
 static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
 
-static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
-static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
+static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
+static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
 static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
 static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) {
@@ -48,7 +48,8 @@ static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void
 mi_decl_noinline static void* mi_segment_cache_pop_ex(
                               bool all_suitable,
                               size_t size, mi_commit_mask_t* commit_mask, 
-                              mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, 
+                              mi_commit_mask_t* decommit_mask, bool large_allowed,
+                              bool* large, bool* is_pinned, bool* is_zero, 
                               mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
@@ -66,23 +67,28 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex(
     if (start_field >= MI_CACHE_FIELDS) start_field = 0;
   }
 
-  // find an available slot
+  // find an available slot and make it unavailable
   mi_bitmap_index_t bitidx = 0;
   bool claimed = false;
   mi_arena_id_t req_arena_id = _req_arena_id;
   mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable);  // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
 
-  if (*large) {  // large allowed?
-    claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
+  if (large_allowed) {  // large allowed?
+    claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
     if (claimed) *large = true;
   }
   if (!claimed) {
-    claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
+    claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
     if (claimed) *large = false;
   }
 
   if (!claimed) return NULL;
 
+  // no longer available but still in-use
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
+
   // found a slot
   mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
   void* p = slot->p;
@@ -95,16 +101,15 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex(
   mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
   
   // mark the slot as free again
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
   _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
   return p;
 #endif
 }
 
 
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
-  return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large, is_pinned, is_zero, _req_arena_id, memid, tld);
+  return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld);
 }
 
 static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
@@ -113,10 +118,11 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
     // nothing
   }
   else if (mi_commit_mask_is_full(cmask)) {
+    // decommit the whole in one call
     _mi_os_decommit(p, total, stats);
   }
   else {
-    // todo: one call to decommit the whole at once?
+    // decommit parts
     mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
     size_t part = total/MI_COMMIT_MASK_BITS;
     size_t idx;
@@ -148,21 +154,25 @@ static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force,
     if (expire != 0 && (force || now >= expire)) {  // racy read
       // seems expired, first claim it from available
       purged++;
-      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
-      if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
-        // was available, we claimed it
+      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);      
+      if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) {  // no need to check large as those cannot be decommitted anyways
+        // it was available, we claimed it (and made it unavailable)
+        mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
+        mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
+        // we can now access it safely
         expire = mi_atomic_loadi64_acquire(&slot->expire);
         if (expire != 0 && (force || now >= expire)) {  // safe read
+          mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
           // still expired, decommit it
           mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-          mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+          mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask));
           _mi_abandoned_await_readers();  // wait until safe to decommit
           // decommit committed parts
           // TODO: instead of decommit, we could also free to the OS?
           mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
           mi_commit_mask_create_empty(&slot->decommit_mask);
         }
-        _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
+        _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
       if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
     }
@@ -184,23 +194,20 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
   mi_commit_mask_t decommit_mask;
   bool is_pinned;
   bool is_zero;
+  bool is_large;
   size_t memid;
   const size_t size = MI_SEGMENT_SIZE;
-  // iterate twice: first large pages, then regular memory 
-  for (int i = 0; i < 2; i++) {
-    void* p;
-    do {
-      // keep popping and freeing the memory
-      bool large = (i == 0);  
-      p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask,
-                                  &large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld);
-      if (p != NULL) {
-        size_t csize = _mi_commit_mask_committed_size(&commit_mask, size);
-        if (csize > 0 && !is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
-        _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats);
-      }
-    } while (p != NULL);
-  }
+  void* p;
+  do {
+    // keep popping and freeing the memory
+    p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask,
+                                true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld);
+    if (p != NULL) {
+      size_t csize = _mi_commit_mask_committed_size(&commit_mask, size);
+      if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); }
+      _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats);
+    }
+  } while (p != NULL);  
 }
 
 mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
@@ -228,8 +235,8 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
   if (!claimed) return false;
 
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
+  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
 #if MI_DEBUG>1
   if (is_pinned || is_large) {
     mi_assert_internal(mi_commit_mask_is_full(commit_mask));
@@ -257,7 +264,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   }
 
   // make it available
-  _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
+  _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx);
   return true;
 #endif
 }
@@ -273,7 +280,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
 
 
 #if (MI_INTPTR_SIZE==8)
-#define MI_MAX_ADDRESS    ((size_t)20 << 40)  // 20TB
+#define MI_MAX_ADDRESS    ((size_t)40 << 40)  // 20TB
 #else
 #define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
 #endif
diff --git a/src/segment.c b/src/segment.c
index c9525490..f8a5d6a0 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -809,7 +809,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
 
   // get from cache?
   if (page_alignment == 0) {
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
+    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
   }
   
   // get from OS

From 90600188a8624bc30e807b62dc643c0dc7e3d6e7 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 28 Mar 2023 09:58:31 -0700
Subject: [PATCH 207/352] remove superfluous prototypes

---
 src/arena.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 674df73f..18e3f2ac 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -28,17 +28,6 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
 #include "bitmap.h"  // atomic bitmap
 
 
-// os.c
-void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
-void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
-
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
-void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
-
-bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-bool  _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
-
-
 /* -----------------------------------------------------------
   Arena allocation
 ----------------------------------------------------------- */

From 6dd3073a752e479a53b1b7760193103d0e83e5d6 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Tue, 28 Mar 2023 10:16:19 -0700
Subject: [PATCH 208/352] avoid caching segments in pinned arenas; happes with
 huge OS page reservations

---
 src/segment-cache.c | 17 ++++++++++++-----
 src/segment.c       |  6 ++++--
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index 6f9d5fcb..eeae1b50 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -216,20 +216,27 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   return false;
 #else
 
-  // only for normal segment blocks
+  // purge expired entries
+  mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld);
+
+  // only cache normal segment blocks
   if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
 
+  // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast)
+  // This is a common case with reserved huge OS pages.
+  // 
+  // (note: we could also allow segments that are already fully decommitted but that never happens
+  //  as the first slice is always committed (for the segment metadata))
+  if (!_mi_arena_is_os_allocated(memid) && is_pinned) return false;
+
   // numa node determines start field
   int numa_node = _mi_os_numa_node(NULL);
   size_t start_field = 0;
   if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node;
     if (start_field >= MI_CACHE_FIELDS) start_field = 0;
   }
 
-  // purge expired entries
-  mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld);
-
   // find an available slot
   mi_bitmap_index_t bitidx;
   bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
diff --git a/src/segment.c b/src/segment.c
index f8a5d6a0..dc25dbda 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -397,8 +397,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache
        !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
   {
-    const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
-    if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
+    if (!segment->mem_is_pinned) {
+      const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
+      if (csize > 0) { _mi_stat_decrease(&_mi_stats_main.committed, csize); }
+    }
     _mi_abandoned_await_readers();  // wait until safe to free
     _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats);
   }

From c344bf5c20b7357fa84ad62d5bac7c0b8c7706fc Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@effp.org>
Date: Fri, 31 Mar 2023 21:18:50 -0700
Subject: [PATCH 209/352] wip: work on purgable arenas

---
 src/segment-cache.c |  2 +-
 src/segment.c       | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/segment-cache.c b/src/segment-cache.c
index eeae1b50..58b98df3 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -16,7 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include "./bitmap.h"  // atomic bitmap
 
-//#define MI_CACHE_DISABLE 1    // define to completely disable the segment cache
+// #define MI_CACHE_DISABLE 1    // define to completely disable the segment cache
 
 #define MI_CACHE_FIELDS     (16)
 #define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
diff --git a/src/segment.c b/src/segment.c
index 3e56d50f..8825ce52 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -11,7 +11,8 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <string.h>  // memset
 #include <stdio.h>
 
-#define MI_PAGE_HUGE_ALIGN  (256*1024)
+#define MI_USE_SEGMENT_CACHE 0
+#define MI_PAGE_HUGE_ALIGN   (256*1024)
 
 static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
 
@@ -394,8 +395,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
-  if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache
-       !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
+#if MI_USE_SEGMENT_CACHE  
+  if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache
+     || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
+#endif       
   {
     if (!segment->mem_is_pinned) {
       const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
@@ -809,10 +812,14 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
   const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE;
   mi_segment_t* segment = NULL;
 
+  #if MI_USE_SEGMENT_CACHE  
   // get from cache?
   if (page_alignment == 0) {
     segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
   }
+  #else
+  MI_UNUSED(pdecommit_mask);
+  #endif
   
   // get from OS
   if (segment==NULL) {

From 94a867869e98e5113ffda4bc91c2668969bb38f1 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Mon, 3 Apr 2023 13:53:43 -0700
Subject: [PATCH 210/352] wip: purgeable arenas; fix asan warnings

---
 src/arena.c          | 18 +++++++++++++-----
 src/os.c             |  3 +++
 src/prim/unix/prim.c |  3 ++-
 src/segment.c        | 12 +++++++++---
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index c4665a8f..c99267c8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -133,7 +133,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 {
   size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
   if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
-    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around
+    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around    
     return true;
   };
   return false;
@@ -189,6 +189,8 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
     // no need to commit, but check if already fully committed
     *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
   }
+  
+  mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE);
   return p;
 }
 
@@ -300,7 +302,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
          p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);        
          if (p != NULL) return p;
       }
-    }
+    }    
   }
 
   // finally, fall back to the OS
@@ -356,10 +358,11 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
   const size_t size = blocks * MI_ARENA_BLOCK_SIZE;
   void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE);
   const bool decommitted = mi_os_purge(p, size, stats);
+  // clear the purged blocks
+  _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
   // update committed bitmap
   if (decommitted) {
     _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
-    _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
   }
 }
 
@@ -520,14 +523,19 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset,
       _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
       return;
     }
-    
+
     // potentially decommit
     if (!arena->allow_decommit || arena->blocks_committed == NULL) {
-      mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c)
+      mi_assert_internal(all_committed);
     }
     else {
       mi_assert_internal(arena->blocks_committed != NULL);
       mi_assert_internal(arena->blocks_purge != NULL);
+      if (!all_committed) {
+        // assume the entire range as no longer committed
+        _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
+      }
+      // (delay) purge the entire range
       mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);      
     }
     
diff --git a/src/os.c b/src/os.c
index 6145ccb3..8bcdbf7b 100644
--- a/src/os.c
+++ b/src/os.c
@@ -411,6 +411,9 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   if (err != 0) {
     _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
   }
+  else {
+    mi_track_mem_undefined(start, csize);
+  }
   return (err == 0);
 }
 
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 011ffa7c..09c76f90 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -380,7 +380,8 @@ int _mi_prim_commit(void* start, size_t size, bool commit) {
 }
 
 int _mi_prim_reset(void* start, size_t size) {
-  #if defined(MADV_FREE)
+  // note: disable the use of MADV_FREE since it leads to confusing stats :-(
+  #if 0 // defined(MADV_FREE)
   static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
   int oadvice = (int)mi_atomic_load_relaxed(&advice);
   int err;
diff --git a/src/segment.c b/src/segment.c
index 8825ce52..e4381e74 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -400,12 +400,18 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
      || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
 #endif       
   {
-    if (!segment->mem_is_pinned) {
+    // if not all committed, an arena may decommit the whole area, but that double counts
+    // the already decommitted parts; adjust for that in the stats.
+    if (!mi_commit_mask_is_full(&segment->commit_mask)) {
       const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
-      if (csize > 0) { _mi_stat_decrease(&_mi_stats_main.committed, csize); }
+      mi_assert_internal(size > csize);
+      if (size > csize) {
+        _mi_stat_increase(&_mi_stats_main.committed, size - csize); 
+      }
     }
     _mi_abandoned_await_readers();  // wait until safe to free
-    _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats);
+    _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, 
+                            mi_commit_mask_is_full(&segment->commit_mask) /* all committed? */, tld->stats);
   }
 }
 

From f5ab38f87b692371a5aba6ce7cb942ac20022321 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Mon, 3 Apr 2023 15:06:09 -0700
Subject: [PATCH 211/352] wip: use purge throughout for segments and arenas;
 more agressive delays

---
 doc/mimalloc-doc.h          |   8 +-
 include/mimalloc.h          |  14 ++--
 include/mimalloc/internal.h |   5 +-
 include/mimalloc/types.h    |   9 ++-
 src/arena.c                 |  23 ++----
 src/options.c               |  16 ++--
 src/os.c                    |  15 ++++
 src/region.c                |   8 +-
 src/segment-cache.c         |  28 +++----
 src/segment.c               | 142 +++++++++++++++++++-----------------
 10 files changed, 141 insertions(+), 127 deletions(-)

diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h
index 815901f2..3e75243b 100644
--- a/doc/mimalloc-doc.h
+++ b/doc/mimalloc-doc.h
@@ -821,12 +821,12 @@ typedef enum mi_option_e {
   mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
   mi_option_segment_reset,   ///< Experimental
   mi_option_reset_delay,     ///< Delay in milli-seconds before resetting a page (100ms by default)
-  mi_option_reset_decommits, ///< Experimental
+  mi_option_purge_decommits, ///< Experimental
 
   // v2.x specific options
-  mi_option_allow_decommit,  ///< Enable decommitting memory (=on)
-  mi_option_decommit_delay,  ///< Decommit page memory after N milli-seconds delay (25ms).
-  mi_option_segment_decommit_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms).
+  mi_option_allow_purge,  ///< Enable decommitting memory (=on)
+  mi_option_purge_delay,  ///< Decommit page memory after N milli-seconds delay (25ms).
+  mi_option_segment_purge_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms).
 
   _mi_option_last
 } mi_option_t;
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 23ac05b7..6ade2e96 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -324,27 +324,27 @@ typedef enum mi_option_e {
   // some of the following options are experimental
   // (deprecated options are kept for binary backward compatibility with v1.x versions)
   mi_option_eager_commit,
-  mi_option_deprecated_eager_region_commit,
-  mi_option_reset_decommits,
+  mi_option_eager_arena_commit,
+  mi_option_purge_decommits,
   mi_option_large_os_pages,           // use large (2MiB) OS pages, implies eager commit
   mi_option_reserve_huge_os_pages,    // reserve N huge OS pages (1GiB) at startup
   mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
   mi_option_reserve_os_memory,        // reserve specified amount of OS memory at startup
   mi_option_deprecated_segment_cache,
   mi_option_page_reset,
-  mi_option_abandoned_page_decommit,
+  mi_option_abandoned_page_purge,
   mi_option_deprecated_segment_reset,
   mi_option_eager_commit_delay,
-  mi_option_decommit_delay,
+  mi_option_purge_delay,
   mi_option_use_numa_nodes,           // 0 = use available numa nodes, otherwise use at most N nodes.
   mi_option_limit_os_alloc,           // 1 = do not use OS memory for allocation (but only reserved arenas)
   mi_option_os_tag,
   mi_option_max_errors,
   mi_option_max_warnings,
   mi_option_max_segment_reclaim,
-  mi_option_allow_decommit,
-  mi_option_segment_decommit_delay,  
-  mi_option_decommit_extend_delay,
+  mi_option_allow_purge,
+  mi_option_deprecated_segment_decommit_delay,  
+  mi_option_purge_extend_delay,
   mi_option_destroy_on_exit,
   mi_option_arena_reserve,
   mi_option_arena_purge_delay,
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 2108a909..e97e7d91 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -93,6 +93,7 @@ size_t     _mi_os_page_size(void);
 size_t     _mi_os_good_alloc_size(size_t size);
 bool       _mi_os_has_overcommit(void);
 
+bool       _mi_os_purge(void* p, size_t size, mi_stats_t* stats);
 bool       _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
 bool       _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 bool       _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
@@ -120,8 +121,8 @@ bool       _mi_arena_is_os_allocated(size_t arena_memid);
 void       _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats);
 
 // "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
+void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
 void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
 void       _mi_segment_cache_free_all(mi_os_tld_t* tld);
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index c7ddaaae..38b13883 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (16*MI_SEGMENT_SLICE_SIZE)           // 1MiB
+#define MI_MINIMAL_COMMIT_SIZE      (1*MI_SEGMENT_SLICE_SIZE)            // 1MiB
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
@@ -379,9 +379,10 @@ typedef struct mi_segment_s {
   size_t            mem_alignment;      // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX)
   size_t            mem_align_offset;   // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX)
 
-  bool              allow_decommit;     
-  mi_msecs_t        decommit_expire;
-  mi_commit_mask_t  decommit_mask;
+  bool              allow_decommit;
+  bool              allow_purge;
+  mi_msecs_t        purge_expire;
+  mi_commit_mask_t  purge_mask;
   mi_commit_mask_t  commit_mask;
 
   _Atomic(struct mi_segment_s*) abandoned_next;
diff --git a/src/arena.c b/src/arena.c
index c99267c8..1f0dd2f8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -297,7 +297,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
         mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) )  // not too many arenas already?        
     {      
       mi_arena_id_t arena_id = 0;
-      const bool arena_commit = _mi_os_has_overcommit();
+      const bool arena_commit = _mi_os_has_overcommit() || mi_option_is_enabled(mi_option_eager_arena_commit);
       if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) {
          p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);        
          if (p != NULL) return p;
@@ -336,20 +336,6 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
   Arena purge
 ----------------------------------------------------------- */
 
-// either resets or decommits memory, returns true if the memory was decommitted.
-static bool mi_os_purge(void* p, size_t size, mi_stats_t* stats) {
-  if (mi_option_is_enabled(mi_option_reset_decommits) &&   // should decommit?
-      !_mi_preloading())                                   // don't decommit during preloading (unsafe)
-  {
-    _mi_os_decommit(p, size, stats);
-    return true;  // decommitted
-  }
-  else {
-    _mi_os_reset(p, size, stats);
-    return false;  // not decommitted
-  }
-}
-
 // reset or decommit in an arena and update the committed/decommit bitmaps
 static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) {
   mi_assert_internal(arena->blocks_committed != NULL);
@@ -357,7 +343,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
   mi_assert_internal(arena->allow_decommit);
   const size_t size = blocks * MI_ARENA_BLOCK_SIZE;
   void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE);
-  const bool decommitted = mi_os_purge(p, size, stats);
+  const bool decommitted = _mi_os_purge(p, size, stats);
   // clear the purged blocks
   _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
   // update committed bitmap
@@ -369,6 +355,8 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
 // Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls.
 static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) {
   mi_assert_internal(arena->blocks_purge != NULL);
+  if (!mi_option_is_enabled(mi_option_allow_purge)) return;
+
   const long delay = mi_option_get(mi_option_arena_purge_delay);
   if (_mi_preloading() || delay == 0) {
     // decommit directly
@@ -468,7 +456,8 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi
 
 static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) {
   const long delay = mi_option_get(mi_option_arena_purge_delay);
-  if (_mi_preloading() || delay == 0 /* || !mi_option_is_enabled(mi_option_allow_decommit) */) return;  // nothing will be scheduled
+  if (_mi_preloading() || delay == 0 || !mi_option_is_enabled(mi_option_allow_purge)) return;  // nothing will be scheduled
+
   const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
   if (max_arena == 0) return;
 
diff --git a/src/options.c b/src/options.c
index 239ec308..44eee4b6 100644
--- a/src/options.c
+++ b/src/options.c
@@ -60,15 +60,15 @@ static mi_option_desc_t options[_mi_option_last] =
 
   // Some of the following options are experimental and not all combinations are valid. Use with care.
   { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
-  { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },
+  { 0, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) },
+  { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
   { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
   { 0, UNINIT, MI_OPTION(reserve_os_memory)     },
   { 0, UNINIT, MI_OPTION(deprecated_segment_cache) },  // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
-  { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates  
+  { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge, abandoned_page_decommit) },// decommit free page memory when a thread terminates  
   { 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
   #if defined(__NetBSD__)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
@@ -77,23 +77,23 @@ static mi_option_desc_t options[_mi_option_last] =
   #else
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
   #endif
-  { 25,   UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds
+  { 10,   UNINIT, MI_OPTION_LEGACY(purge_delay, decommit_delay) }, // page decommit delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
   { 16,   UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
   { 8,    UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.  
-  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
-  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
-  { 1,    UNINIT, MI_OPTION(decommit_extend_delay) },
+  { 1,    UNINIT, MI_OPTION_LEGACY(allow_purge, allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
+  { 100,  UNINIT, MI_OPTION(deprecated_segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
+  { 1,    UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
   { 0,    UNINIT, MI_OPTION(destroy_on_exit)},    // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
   #if (MI_INTPTR_SIZE>4)
   { 1024L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time
   #else
   {  128L*1024L, UNINIT, MI_OPTION(arena_reserve) },   
   #endif
-  { 500, UNINIT, MI_OPTION(arena_purge_delay) }  // reset/decommit delay in milli-seconds for arena allocation
+  { 100, UNINIT, MI_OPTION(arena_purge_delay) }  // reset/decommit delay in milli-seconds for arena allocation
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/src/os.c b/src/os.c
index 8bcdbf7b..20c6f8eb 100644
--- a/src/os.c
+++ b/src/os.c
@@ -436,6 +436,21 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat
 }
 */
 
+// either resets or decommits memory, returns true if the memory was decommitted.
+bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) {
+  if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
+      !_mi_preloading())                                   // don't decommit during preloading (unsafe)
+  {
+    _mi_os_decommit(p, size, stats);
+    return true;  // decommitted
+  }
+  else {
+    _mi_os_reset(p, size, stats);
+    return false;  // not decommitted
+  }
+}
+
+
 // Protect a region in memory to be not accessible.
 static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   // page align conservatively within the range
diff --git a/src/region.c b/src/region.c
index 809b9ec8..b01d4091 100644
--- a/src/region.c
+++ b/src/region.c
@@ -307,7 +307,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
     mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
     mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
     _mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
-    if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
+    if (*commit || !mi_option_is_enabled(mi_option_purge_decommits)) { // only if needed
       bool reset_zero = false;
       _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
       if (reset_zero) *is_zero = true;
@@ -415,7 +415,7 @@ void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, s
     // reset the blocks to reduce the working set.
     if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
        && (mi_option_is_enabled(mi_option_eager_commit) ||
-           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
+           mi_option_is_enabled(mi_option_purge_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
     {
       bool any_unreset;
       _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
@@ -467,7 +467,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
 -----------------------------------------------------------------------------*/
 
 bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
+  if (mi_option_is_enabled(mi_option_purge_decommits)) {
     return _mi_os_decommit(p, size, tld->stats);
   }
   else {
@@ -476,7 +476,7 @@ bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
 }
 
 bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
+  if (mi_option_is_enabled(mi_option_purge_decommits)) {
     return _mi_os_commit(p, size, is_zero, tld->stats);
   }
   else {
diff --git a/src/segment-cache.c b/src/segment-cache.c
index 58b98df3..a98e6b07 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -29,7 +29,7 @@ typedef struct mi_cache_slot_s {
   size_t              memid;
   bool                is_pinned;
   mi_commit_mask_t    commit_mask;
-  mi_commit_mask_t    decommit_mask;
+  mi_commit_mask_t    purge_mask;
   _Atomic(mi_msecs_t) expire;
 } mi_cache_slot_t;
 
@@ -48,7 +48,7 @@ static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void
 mi_decl_noinline static void* mi_segment_cache_pop_ex(
                               bool all_suitable,
                               size_t size, mi_commit_mask_t* commit_mask, 
-                              mi_commit_mask_t* decommit_mask, bool large_allowed,
+                              mi_commit_mask_t* purge_mask, bool large_allowed,
                               bool* large, bool* is_pinned, bool* is_zero, 
                               mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
@@ -96,7 +96,7 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex(
   *is_pinned = slot->is_pinned;
   *is_zero = false;
   *commit_mask = slot->commit_mask;     
-  *decommit_mask = slot->decommit_mask;
+  *purge_mask = slot->purge_mask;
   slot->p = NULL;
   mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
   
@@ -107,9 +107,9 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex(
 }
 
 
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
-  return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld);
+  return mi_segment_cache_pop_ex(false, size, commit_mask, purge_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld);
 }
 
 static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
@@ -142,7 +142,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld)
 {
   MI_UNUSED(tld);
-  if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
+  if (!mi_option_is_enabled(mi_option_allow_purge)) return;
   mi_msecs_t now = _mi_clock_now();
   size_t purged = 0;
   const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
@@ -170,7 +170,7 @@ static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force,
           // decommit committed parts
           // TODO: instead of decommit, we could also free to the OS?
           mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
-          mi_commit_mask_create_empty(&slot->decommit_mask);
+          mi_commit_mask_create_empty(&slot->purge_mask);
         }
         _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
@@ -191,7 +191,7 @@ void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
 
 void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
   mi_commit_mask_t commit_mask;
-  mi_commit_mask_t decommit_mask;
+  mi_commit_mask_t purge_mask;
   bool is_pinned;
   bool is_zero;
   bool is_large;
@@ -200,7 +200,7 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
   void* p;
   do {
     // keep popping and freeing the memory
-    p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask,
+    p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &purge_mask,
                                 true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld);
     if (p != NULL) {
       size_t csize = _mi_commit_mask_committed_size(&commit_mask, size);
@@ -210,7 +210,7 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
   } while (p != NULL);  
 }
 
-mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
+mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
   return false;
@@ -257,13 +257,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   slot->is_pinned = is_pinned;
   mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
   slot->commit_mask = *commit_mask;
-  slot->decommit_mask = *decommit_mask;
-  if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
-    long delay = mi_option_get(mi_option_segment_decommit_delay);
+  slot->purge_mask = *purge_mask;
+  if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_purge)) {
+    long delay = mi_option_get(mi_option_arena_purge_delay);
     if (delay == 0) {
       _mi_abandoned_await_readers(); // wait until safe to decommit
       mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
-      mi_commit_mask_create_empty(&slot->decommit_mask);
+      mi_commit_mask_create_empty(&slot->purge_mask);
     }
     else {
       mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
diff --git a/src/segment.c b/src/segment.c
index e4381e74..65b21d94 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_USE_SEGMENT_CACHE 0
 #define MI_PAGE_HUGE_ALIGN   (256*1024)
 
-static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
+static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats);
 
 
 // -------------------------------------------------------------------
@@ -258,7 +258,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(segment->abandoned <= segment->used);
   mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // can only decommit committed blocks
   //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
   mi_slice_t* slice = &segment->slices[0];
   const mi_slice_t* end = mi_segment_slices_end(segment);
@@ -390,14 +390,14 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
     _mi_os_unprotect(end, os_pagesize);
   }
 
-  // purge delayed decommits now? (no, leave it to the cache)
-  // mi_segment_delayed_decommit(segment,true,tld->stats);
+  // purge delayed decommits now? (no, leave it to the arena)
+  // mi_segment_delayed_purge(segment,true,tld->stats);
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
 #if MI_USE_SEGMENT_CACHE  
   if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache
-     || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
+     || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->purge_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
 #endif       
   {
     // if not all committed, an arena may decommit the whole area, but that double counts
@@ -478,7 +478,7 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
 
 
 static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));
 
   // commit liberal, but decommit conservative
   uint8_t* start = NULL;
@@ -488,6 +488,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   if (mi_commit_mask_is_empty(&mask) || full_size==0) return true;
 
   if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) {
+    // committing
     bool is_zero = false;
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
@@ -496,41 +497,47 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
     mi_commit_mask_set(&segment->commit_mask, &mask);     
   }
   else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) {
+    // purging
     mi_assert_internal((void*)start != (void*)segment);
-    //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask));
-
-    mi_commit_mask_t cmask;
-    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
-    _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
-    if (segment->allow_decommit) { 
-      _mi_os_decommit(start, full_size, stats); // ok if this fails
-    } 
-    mi_commit_mask_clear(&segment->commit_mask, &mask);
+    if (mi_option_is_enabled(mi_option_allow_purge)) {
+      if (segment->allow_decommit) { 
+        const bool decommitted = _mi_os_purge(start, full_size, stats);  // reset or decommit
+        if (decommitted) {
+          mi_commit_mask_t cmask;
+          mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
+          _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting 
+          mi_commit_mask_clear(&segment->commit_mask, &mask);
+        }
+      }
+      else if (segment->allow_purge) {
+        _mi_os_reset(start, full_size, stats);
+      }
+    }
   }
   // increase expiration of reusing part of the delayed decommit
-  if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) {
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
+  if (commit && mi_commit_mask_any_set(&segment->purge_mask, &mask)) {
+    segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay);
   }
-  // always undo delayed decommits
-  mi_commit_mask_clear(&segment->decommit_mask, &mask);
+  // always undo delayed purges
+  mi_commit_mask_clear(&segment->purge_mask, &mask);
   return true;
 }
 
 static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));
   // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow
-  if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed
+  if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed
   mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   return mi_segment_commitx(segment,true,p,size,stats);
 }
 
-static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  if (!segment->allow_decommit) return;
-  if (mi_option_get(mi_option_decommit_delay) == 0) {
+static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
+  if (!segment->allow_purge) return;
+  if (mi_option_get(mi_option_purge_delay) == 0) {
     mi_segment_commitx(segment, false, p, size, stats);
   }
   else {
-    // register for future decommit in the decommit mask
+    // register for future purge in the purge mask
     uint8_t* start = NULL;
     size_t   full_size = 0;
     mi_commit_mask_t mask; 
@@ -538,39 +545,39 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     if (mi_commit_mask_is_empty(&mask) || full_size==0) return;
     
     // update delayed commit
-    mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask));      
+    mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask));      
     mi_commit_mask_t cmask;
-    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
-    mi_commit_mask_set(&segment->decommit_mask, &cmask);
+    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only purge what is committed; span_free may try to decommit more
+    mi_commit_mask_set(&segment->purge_mask, &cmask);
     mi_msecs_t now = _mi_clock_now();    
-    if (segment->decommit_expire == 0) {
+    if (segment->purge_expire == 0) {
       // no previous decommits, initialize now
-      segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay);
+      segment->purge_expire = now + mi_option_get(mi_option_purge_delay);
     }
-    else if (segment->decommit_expire <= now) {
+    else if (segment->purge_expire <= now) {
       // previous decommit mask already expired
-      if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) {
-        mi_segment_delayed_decommit(segment, true, stats);
+      if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) {
+        mi_segment_delayed_purge(segment, true, stats);
       }
       else {
-        segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+        segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's
       }
     }
     else {
       // previous decommit mask is not yet expired, increase the expiration by a bit.
-      segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay);
+      segment->purge_expire += mi_option_get(mi_option_purge_extend_delay);
     }
   }  
 }
 
-static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) {
-  if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return;
+static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) {
+  if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return;
   mi_msecs_t now = _mi_clock_now();
-  if (!force && now < segment->decommit_expire) return;
+  if (!force && now < segment->purge_expire) return;
 
-  mi_commit_mask_t mask = segment->decommit_mask;
-  segment->decommit_expire = 0;
-  mi_commit_mask_create_empty(&segment->decommit_mask);
+  mi_commit_mask_t mask = segment->purge_mask;
+  segment->purge_expire = 0;
+  mi_commit_mask_create_empty(&segment->purge_mask);
 
   size_t idx;
   size_t count;
@@ -583,7 +590,7 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
     }
   }
   mi_commit_mask_foreach_end()
-  mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
+  mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask));
 }
 
 
@@ -596,7 +603,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) {
 }
 
 // note: can be called on abandoned segments
-static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
+static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_index < segment->slice_entries);
   mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) 
                           ? NULL : mi_span_queue_for(slice_count,tld));
@@ -616,8 +623,8 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
   }
 
   // perhaps decommit
-  if (allow_decommit) {
-    mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
+  if (allow_purge) {
+    mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
   }
   
   // and push it on the free page queue (if it was not a huge page)
@@ -794,7 +801,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
 
 static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id,
                                           size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, 
-                                          mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask,
+                                          mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* ppurge_mask,
                                           bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 
 {
@@ -821,10 +828,10 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
   #if MI_USE_SEGMENT_CACHE  
   // get from cache?
   if (page_alignment == 0) {
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
+    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, ppurge_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
   }
   #else
-  MI_UNUSED(pdecommit_mask);
+  MI_UNUSED(ppurge_mask);
   #endif
   
   // get from OS
@@ -886,13 +893,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   bool is_zero = false;  
 
   mi_commit_mask_t commit_mask;
-  mi_commit_mask_t decommit_mask;
+  mi_commit_mask_t purge_mask;
   mi_commit_mask_create_empty(&commit_mask);
-  mi_commit_mask_create_empty(&decommit_mask);
+  mi_commit_mask_create_empty(&purge_mask);
 
   // Allocate the segment from the OS  
   mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, 
-                                              &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask, 
+                                              &segment_slices, &pre_size, &info_slices, &commit_mask, &purge_mask, 
                                               &is_zero, &commit, tld, os_tld);
   if (segment == NULL) return NULL;
   
@@ -908,21 +915,22 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   }
   
   segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
-  segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
-  if (segment->allow_decommit) {
-    segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
-    segment->decommit_mask = decommit_mask;
-    mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
+  segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large;    
+  segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit;
+  if (segment->allow_purge) {
+    segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay);
+    segment->purge_mask = purge_mask;
+    mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));
     #if MI_DEBUG>2
     const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
     mi_commit_mask_t commit_needed_mask;
     mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
-    mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask));
+    mi_assert_internal(!mi_commit_mask_any_set(&segment->purge_mask, &commit_needed_mask));
     #endif
   }    
   else {
-    segment->decommit_expire = 0;
-    mi_commit_mask_create_empty( &segment->decommit_mask );
+    segment->purge_expire = 0;
+    mi_commit_mask_create_empty( &segment->purge_mask );
   }
   
   // initialize segment info
@@ -965,7 +973,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   }
   else {
     mi_assert_internal(huge_page!=NULL);
-    mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
+    mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask));
     mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask));
     *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld);
     mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance 
@@ -1269,8 +1277,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
     slice = slice + slice->slice_count;
   }
 
-  // perform delayed decommits
-  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats);    
+  // perform delayed decommits (forcing is much slower on mstress)
+  mi_segment_delayed_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats);    
   
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
@@ -1459,7 +1467,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
     }
     else {
       // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again
+      mi_segment_delayed_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again
       mi_abandoned_visited_push(segment);
     }
   }
@@ -1483,9 +1491,9 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
       mi_segment_reclaim(segment, heap, 0, NULL, tld);
     }
     else {
-      // otherwise, decommit if needed and push on the visited list 
-      // note: forced decommit can be expensive if many threads are destroyed/created as in mstress.
-      mi_segment_delayed_decommit(segment, force, tld->stats);
+      // otherwise, purge if needed and push on the visited list 
+      // note: forced purge can be expensive if many threads are destroyed/created as in mstress.
+      mi_segment_delayed_purge(segment, force, tld->stats);
       mi_abandoned_visited_push(segment);
     }
   }
@@ -1543,7 +1551,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
   }
   mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
   mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
-  mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
+  mi_segment_delayed_purge(_mi_ptr_segment(page), false, tld->stats);
   return page;
 }
 

From a9f42376b793449396bc5e2d430f40153fecbebc Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Mon, 3 Apr 2023 16:17:02 -0700
Subject: [PATCH 212/352] small changes; make minimal commit most fine grained

---
 include/mimalloc/types.h | 2 +-
 src/alloc-aligned.c      | 4 ++--
 src/init.c               | 5 ++++-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 38b13883..ebbea391 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (1*MI_SEGMENT_SLICE_SIZE)            // 1MiB
+#define MI_MINIMAL_COMMIT_SIZE      (1*MI_SEGMENT_SLICE_SIZE)            // most fine-grained
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index e79a2220..59f5a524 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -146,10 +146,10 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he
   return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false);
 }
 
-mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
   #if !MI_PADDING
   // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
-  if (!_mi_is_power_of_two(alignment)) return NULL;
+  if mi_unlikely(!_mi_is_power_of_two(alignment)) return NULL;
   if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
   #else
   // with padding, we can only guarantee this for fixed alignments
diff --git a/src/init.c b/src/init.c
index 0b4325e3..9378d028 100644
--- a/src/init.c
+++ b/src/init.c
@@ -37,6 +37,7 @@ const mi_page_t _mi_page_empty = {
 
 #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
 
+#if (MI_SMALL_WSIZE_MAX==128)
 #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8)
 #define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
 #elif (MI_PADDING>0)
@@ -44,7 +45,9 @@ const mi_page_t _mi_page_empty = {
 #else
 #define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() }
 #endif
-
+#else
+#error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX"
+#endif
 
 // Empty page queues for every bin
 #define QNULL(sz)  { NULL, NULL, (sz)*sizeof(uintptr_t) }

From d22a13c990c8faf0031f7b463c02663bf9d96b8c Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Mon, 3 Apr 2023 17:58:28 -0700
Subject: [PATCH 213/352] wip: purgeable arenas, various fixes

---
 include/mimalloc/types.h |  2 +-
 src/arena.c              | 14 +++++++++++---
 src/options.c            |  2 +-
 src/prim/unix/prim.c     | 16 ----------------
 src/segment.c            | 10 +++++-----
 5 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index ebbea391..962535e3 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e {
 // is still tracked in fine-grained MI_COMMIT_SIZE chunks)
 // ------------------------------------------------------
 
-#define MI_MINIMAL_COMMIT_SIZE      (1*MI_SEGMENT_SLICE_SIZE)            // most fine-grained
+#define MI_MINIMAL_COMMIT_SIZE      (1*MI_SEGMENT_SLICE_SIZE)            
 #define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
 #define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
 #define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
diff --git a/src/arena.c b/src/arena.c
index 1f0dd2f8..57db9f7e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -182,7 +182,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
     if (any_uncommitted) {
       bool commit_zero;
       _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
-      if (commit_zero) *is_zero = true;
+      if (commit_zero) { *is_zero = true; }
     }
   }
   else {
@@ -190,7 +190,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
     *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
   }
   
-  mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE);
+  // mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE);
   return p;
 }
 
@@ -297,7 +297,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
         mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) )  // not too many arenas already?        
     {      
       mi_arena_id_t arena_id = 0;
-      const bool arena_commit = _mi_os_has_overcommit() || mi_option_is_enabled(mi_option_eager_arena_commit);
+
+      bool arena_commit = _mi_os_has_overcommit();
+      if (mi_option_get(mi_option_eager_arena_commit) == 1) { arena_commit = true; } 
+      else if (mi_option_get(mi_option_eager_arena_commit) == 0) { arena_commit = false; } 
+
       if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) {
          p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);        
          if (p != NULL) return p;
@@ -513,6 +517,9 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset,
       return;
     }
 
+    // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
+    mi_track_mem_undefined(p,size);
+
     // potentially decommit
     if (!arena->allow_decommit || arena->blocks_committed == NULL) {
       mi_assert_internal(all_committed);
@@ -523,6 +530,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset,
       if (!all_committed) {
         // assume the entire range as no longer committed
         _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
+        mi_track_mem_noaccess(p,size);
       }
       // (delay) purge the entire range
       mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);      
diff --git a/src/options.c b/src/options.c
index 44eee4b6..ca8bf5d8 100644
--- a/src/options.c
+++ b/src/options.c
@@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] =
 
   // Some of the following options are experimental and not all combinations are valid. Use with care.
   { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
-  { 0, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) },
+  { 2, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) },
   { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 09c76f90..8d027ebb 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -342,22 +342,6 @@ static void unix_mprotect_hint(int err) {
 
 
 int _mi_prim_commit(void* start, size_t size, bool commit) {
-  /*
-  #if 0 && defined(MAP_FIXED) && !defined(__APPLE__)
-  // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?)
-  if (commit) {
-    // commit: just change the protection
-    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
-    if (err != 0) { err = errno; }
-  }
-  else {
-    // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss)
-    const int fd = mi_unix_mmap_fd();
-    void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
-    if (p != start) { err = errno; }
-  }
-  #else
-  */
   int err = 0;
   if (commit) {
     // commit: ensure we can access the area
diff --git a/src/segment.c b/src/segment.c
index 65b21d94..63e47742 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -756,7 +756,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz
   mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
   size_t next_index = mi_slice_index(slice) + slice_count;
   size_t next_count = slice->slice_count - slice_count;
-  mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld);
+  mi_segment_span_free(segment, next_index, next_count, false /* don't purge left-over part */, tld);
   slice->slice_count = (uint32_t)slice_count;
 }
 
@@ -915,7 +915,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   }
   
   segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
-  segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large;    
+  segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large;
   segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit;
   if (segment->allow_purge) {
     segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay);
@@ -969,7 +969,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   // initialize initial free pages
   if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
     mi_assert_internal(huge_page==NULL);
-    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld);
+    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't purge */, tld);
   }
   else {
     mi_assert_internal(huge_page!=NULL);
@@ -1585,7 +1585,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
     mi_assert_internal(psize - (aligned_p - start) >= size);      
     uint8_t* decommit_start = start + sizeof(mi_block_t);              // for the free list
     ptrdiff_t decommit_size = aligned_p - decommit_start;
-    _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments    
+    _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments    
   }
   
   return page;
@@ -1630,7 +1630,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc
   if (segment->allow_decommit) {
     const size_t csize = mi_usable_size(block) - sizeof(mi_block_t);
     uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
-    _mi_os_decommit(p, csize, &_mi_stats_main);  // note: cannot use segment_decommit on huge segments
+    _mi_os_reset(p, csize, &_mi_stats_main);  // note: cannot use segment_decommit on huge segments
   }
 }
 #endif

From 33d7503fdb1b9fbe047756309455f4223eab55dd Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Mon, 3 Apr 2023 19:57:26 -0700
Subject: [PATCH 214/352] rename to arena_eager_commit

---
 include/mimalloc.h | 2 +-
 src/arena.c        | 4 ++--
 src/options.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 6ade2e96..cb408acc 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -324,7 +324,7 @@ typedef enum mi_option_e {
   // some of the following options are experimental
   // (deprecated options are kept for binary backward compatibility with v1.x versions)
   mi_option_eager_commit,
-  mi_option_eager_arena_commit,
+  mi_option_arena_eager_commit,
   mi_option_purge_decommits,
   mi_option_large_os_pages,           // use large (2MiB) OS pages, implies eager commit
   mi_option_reserve_huge_os_pages,    // reserve N huge OS pages (1GiB) at startup
diff --git a/src/arena.c b/src/arena.c
index 57db9f7e..ca4c87a3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -299,8 +299,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
       mi_arena_id_t arena_id = 0;
 
       bool arena_commit = _mi_os_has_overcommit();
-      if (mi_option_get(mi_option_eager_arena_commit) == 1) { arena_commit = true; } 
-      else if (mi_option_get(mi_option_eager_arena_commit) == 0) { arena_commit = false; } 
+      if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } 
+      else if (mi_option_get(mi_option_arena_eager_commit) == 0) { arena_commit = false; } 
 
       if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) {
          p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);        
diff --git a/src/options.c b/src/options.c
index ca8bf5d8..3eeccaae 100644
--- a/src/options.c
+++ b/src/options.c
@@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] =
 
   // Some of the following options are experimental and not all combinations are valid. Use with care.
   { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
-  { 2, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) },
+  { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) },
   { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages

From 09297ba8cf7d8dd8429440acfcf326754cc58a5a Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 11:46:02 -0700
Subject: [PATCH 215/352] wip: purgeable arenas

---
 src/arena.c   |   7 ++--
 src/options.c |   4 +-
 src/os.c      |   5 ++-
 src/segment.c | 101 ++++++++++++++++++++++++++++----------------------
 src/stats.c   |   4 +-
 5 files changed, 68 insertions(+), 53 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index ca4c87a3..134a6227 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -163,7 +163,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
 
   // none of the claimed blocks should be scheduled for a decommit
   if (arena->blocks_purge != NULL) {
-    // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `in_use`).
+    // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`).
     _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index);
   }
 
@@ -176,7 +176,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
     *commit = true;
   }
   else if (*commit) {
-    // arena not committed as a whole, but commit requested: ensure commit now
+    // commit requested, but the range may not be committed as a whole: ensure it is committed now
     bool any_uncommitted;
     _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
     if (any_uncommitted) {
@@ -294,7 +294,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
     arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
     if (arena_reserve > 0 && arena_reserve >= size &&                    // eager reserve enabled and large enough?
         req_arena_id == _mi_arena_id_none() &&                           // not exclusive?
-        mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) )  // not too many arenas already?        
+        mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) && // not too many arenas already?        
+        !_mi_preloading() )                                              // and not before main runs
     {      
       mi_arena_id_t arena_id = 0;
 
diff --git a/src/options.c b/src/options.c
index 3eeccaae..bb11b6a5 100644
--- a/src/options.c
+++ b/src/options.c
@@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] =
   // Some of the following options are experimental and not all combinations are valid. Use with care.
   { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
   { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) },
-  { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },
+  { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
   { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
@@ -72,8 +72,6 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
   #if defined(__NetBSD__)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  #elif defined(_WIN32)
-  { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
   #else
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
   #endif
diff --git a/src/os.c b/src/os.c
index 20c6f8eb..f54e2513 100644
--- a/src/os.c
+++ b/src/os.c
@@ -437,7 +437,10 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat
 */
 
 // either resets or decommits memory, returns true if the memory was decommitted.
-bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) {
+bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) 
+{
+  if (!mi_option_is_enabled(mi_option_allow_purge)) return false;
+
   if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
       !_mi_preloading())                                   // don't decommit during preloading (unsafe)
   {
diff --git a/src/segment.c b/src/segment.c
index 63e47742..5a324adb 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_USE_SEGMENT_CACHE 0
 #define MI_PAGE_HUGE_ALIGN   (256*1024)
 
-static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats);
+static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats);
 
 
 // -------------------------------------------------------------------
@@ -391,7 +391,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   }
 
   // purge delayed decommits now? (no, leave it to the arena)
-  // mi_segment_delayed_purge(segment,true,tld->stats);
+  // mi_segment_try_purge(segment,true,tld->stats);
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
@@ -476,49 +476,32 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
   mi_commit_mask_create(bitidx, bitcount, cm);
 }
 
-
-static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
+static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
   mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));
 
-  // commit liberal, but decommit conservative
+  // commit liberal
   uint8_t* start = NULL;
   size_t   full_size = 0;
   mi_commit_mask_t mask;
-  mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask);
-  if (mi_commit_mask_is_empty(&mask) || full_size==0) return true;
+  mi_segment_commit_mask(segment, false /* conservative? */, p, size, &start, &full_size, &mask);
+  if (mi_commit_mask_is_empty(&mask) || full_size == 0) return true;
 
-  if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) {
+  if (!mi_commit_mask_all_set(&segment->commit_mask, &mask)) {
     // committing
     bool is_zero = false;
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
     _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
-    if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;    
-    mi_commit_mask_set(&segment->commit_mask, &mask);     
+    if (!_mi_os_commit(start, full_size, &is_zero, stats)) return false;
+    mi_commit_mask_set(&segment->commit_mask, &mask);
   }
-  else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) {
-    // purging
-    mi_assert_internal((void*)start != (void*)segment);
-    if (mi_option_is_enabled(mi_option_allow_purge)) {
-      if (segment->allow_decommit) { 
-        const bool decommitted = _mi_os_purge(start, full_size, stats);  // reset or decommit
-        if (decommitted) {
-          mi_commit_mask_t cmask;
-          mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
-          _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting 
-          mi_commit_mask_clear(&segment->commit_mask, &mask);
-        }
-      }
-      else if (segment->allow_purge) {
-        _mi_os_reset(start, full_size, stats);
-      }
-    }
-  }
-  // increase expiration of reusing part of the delayed decommit
-  if (commit && mi_commit_mask_any_set(&segment->purge_mask, &mask)) {
+  
+  // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon.
+  if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) {
     segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay);
   }
-  // always undo delayed purges
+
+  // always clear any delayed purges in our range (as they are either committed now)
   mi_commit_mask_clear(&segment->purge_mask, &mask);
   return true;
 }
@@ -528,13 +511,43 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_
   // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow
   if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed
   mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
-  return mi_segment_commitx(segment,true,p,size,stats);
+  return mi_segment_commit(segment, p, size, stats);
+}
+
+static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {    
+  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask));
+  if (!segment->allow_purge) return true;
+
+  // purge conservative
+  uint8_t* start = NULL;
+  size_t   full_size = 0;
+  mi_commit_mask_t mask;
+  mi_segment_commit_mask(segment, true /* conservative? */, p, size, &start, &full_size, &mask);
+  if (mi_commit_mask_is_empty(&mask) || full_size==0) return true;
+
+  if (mi_commit_mask_any_set(&segment->commit_mask, &mask)) {
+    // purging
+    mi_assert_internal((void*)start != (void*)segment);
+    mi_assert_internal(segment->allow_decommit);
+    const bool decommitted = _mi_os_purge(start, full_size, stats);  // reset or decommit
+    if (decommitted) {
+      mi_commit_mask_t cmask;
+      mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
+      _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting 
+      mi_commit_mask_clear(&segment->commit_mask, &mask);
+    }        
+  }
+  
+  // always clear any scheduled purges in our range
+  mi_commit_mask_clear(&segment->purge_mask, &mask);
+  return true;
 }
 
 static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
   if (!segment->allow_purge) return;
+
   if (mi_option_get(mi_option_purge_delay) == 0) {
-    mi_segment_commitx(segment, false, p, size, stats);
+    mi_segment_purge(segment, p, size, stats);
   }
   else {
     // register for future purge in the purge mask
@@ -551,26 +564,26 @@ static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t
     mi_commit_mask_set(&segment->purge_mask, &cmask);
     mi_msecs_t now = _mi_clock_now();    
     if (segment->purge_expire == 0) {
-      // no previous decommits, initialize now
+      // no previous purgess, initialize now
       segment->purge_expire = now + mi_option_get(mi_option_purge_delay);
     }
     else if (segment->purge_expire <= now) {
-      // previous decommit mask already expired
+      // previous purge mask already expired
       if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) {
-        mi_segment_delayed_purge(segment, true, stats);
+        mi_segment_try_purge(segment, true, stats);
       }
       else {
         segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's
       }
     }
     else {
-      // previous decommit mask is not yet expired, increase the expiration by a bit.
+      // previous purge mask is not yet expired, increase the expiration by a bit.
       segment->purge_expire += mi_option_get(mi_option_purge_extend_delay);
     }
   }  
 }
 
-static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) {
+static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) {
   if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return;
   mi_msecs_t now = _mi_clock_now();
   if (!force && now < segment->purge_expire) return;
@@ -586,7 +599,7 @@ static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats
     if (count > 0) {
       uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE);
       size_t size = count * MI_COMMIT_SIZE;
-      mi_segment_commitx(segment, false, p, size, stats);
+      mi_segment_purge(segment, p, size, stats);
     }
   }
   mi_commit_mask_foreach_end()
@@ -916,7 +929,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   
   segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
   segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large;
-  segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit;
+  segment->allow_purge = segment->allow_decommit && mi_option_is_enabled(mi_option_allow_purge);
   if (segment->allow_purge) {
     segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay);
     segment->purge_mask = purge_mask;
@@ -1278,7 +1291,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   }
 
   // perform delayed decommits (forcing is much slower on mstress)
-  mi_segment_delayed_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats);    
+  mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats);    
   
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
@@ -1467,7 +1480,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
     }
     else {
       // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again
+      mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again
       mi_abandoned_visited_push(segment);
     }
   }
@@ -1493,7 +1506,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
     else {
       // otherwise, purge if needed and push on the visited list 
       // note: forced purge can be expensive if many threads are destroyed/created as in mstress.
-      mi_segment_delayed_purge(segment, force, tld->stats);
+      mi_segment_try_purge(segment, force, tld->stats);
       mi_abandoned_visited_push(segment);
     }
   }
@@ -1551,7 +1564,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
   }
   mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
   mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
-  mi_segment_delayed_purge(_mi_ptr_segment(page), false, tld->stats);
+  mi_segment_try_purge(_mi_ptr_segment(page), false, tld->stats);
   return page;
 }
 
diff --git a/src/stats.c b/src/stats.c
index d2a31681..cc87513d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -111,7 +111,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
   mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1);
   mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1);
   mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1);
-
+  
   mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
   mi_stat_counter_add(&stats->searches, &src->searches, 1);
   mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1);
@@ -331,7 +331,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
   mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg);
   mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
-  mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
+  mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);  
   mi_stat_print(&stats->threads, "threads", -1, out, arg);
   mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
   _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count());

From 9f0da5c1951aec25c432dff013c16e4f09244efd Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Tue, 4 Apr 2023 16:48:02 -0700
Subject: [PATCH 216/352] merge ide files

---
 ide/vs2022/mimalloc-override.vcxproj | 3 ---
 ide/vs2022/mimalloc.vcxproj          | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj
index 5c0513c6..52ed5282 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@@ -257,11 +257,8 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
-<<<<<<< HEAD
     <ClCompile Include="..\..\src\segment-cache.c" />
-=======
     <ClCompile Include="..\..\src\segment-map.c" />
->>>>>>> dev-reset
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
   </ItemGroup>
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 36100a79..33a719c1 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -235,11 +235,8 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
-<<<<<<< HEAD
     <ClCompile Include="..\..\src\segment-cache.c" />
-=======
     <ClCompile Include="..\..\src\segment-map.c" />
->>>>>>> dev-reset
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
     <ClCompile Include="..\..\src\stats.c" />

From 96b02dda1ff02db716b48d86e60fcf67f3593b45 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Sat, 8 Apr 2023 17:55:07 -0700
Subject: [PATCH 217/352] fix accidental cmake move

---
 cmake/JoinPaths.cmake               | 23 +++++++++++++++++++++++
 cmake/mimalloc-config-version.cmake | 19 +++++++++++++++++++
 cmake/mimalloc-config.cmake         | 14 ++++++++++++++
 3 files changed, 56 insertions(+)
 create mode 100644 cmake/JoinPaths.cmake
 create mode 100644 cmake/mimalloc-config-version.cmake
 create mode 100644 cmake/mimalloc-config.cmake

diff --git a/cmake/JoinPaths.cmake b/cmake/JoinPaths.cmake
new file mode 100644
index 00000000..c68d91b8
--- /dev/null
+++ b/cmake/JoinPaths.cmake
@@ -0,0 +1,23 @@
+# This module provides function for joining paths
+# known from most languages
+#
+# SPDX-License-Identifier: (MIT OR CC0-1.0)
+# Copyright 2020 Jan Tojnar
+# https://github.com/jtojnar/cmake-snips
+#
+# Modelled after Python’s os.path.join
+# https://docs.python.org/3.7/library/os.path.html#os.path.join
+# Windows not supported
+function(join_paths joined_path first_path_segment)
+    set(temp_path "${first_path_segment}")
+    foreach(current_segment IN LISTS ARGN)
+        if(NOT ("${current_segment}" STREQUAL ""))
+            if(IS_ABSOLUTE "${current_segment}")
+                set(temp_path "${current_segment}")
+            else()
+                set(temp_path "${temp_path}/${current_segment}")
+            endif()
+        endif()
+    endforeach()
+    set(${joined_path} "${temp_path}" PARENT_SCOPE)
+endfunction()
diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
new file mode 100644
index 00000000..a44c121d
--- /dev/null
+++ b/cmake/mimalloc-config-version.cmake
@@ -0,0 +1,19 @@
+set(mi_version_major 2)
+set(mi_version_minor 1)
+set(mi_version_patch 2)
+set(mi_version ${mi_version_major}.${mi_version_minor})
+
+set(PACKAGE_VERSION ${mi_version})
+if(PACKAGE_FIND_VERSION_MAJOR)
+    if("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "${mi_version_major}")
+        if ("${PACKAGE_FIND_VERSION_MINOR}" EQUAL "${mi_version_minor}")
+            set(PACKAGE_VERSION_EXACT TRUE)
+        elseif("${PACKAGE_FIND_VERSION_MINOR}" LESS "${mi_version_minor}")
+            set(PACKAGE_VERSION_COMPATIBLE TRUE)
+        else()
+            set(PACKAGE_VERSION_UNSUITABLE TRUE)
+        endif()
+    else()
+        set(PACKAGE_VERSION_UNSUITABLE TRUE)
+    endif()
+endif()
diff --git a/cmake/mimalloc-config.cmake b/cmake/mimalloc-config.cmake
new file mode 100644
index 00000000..a49b02a2
--- /dev/null
+++ b/cmake/mimalloc-config.cmake
@@ -0,0 +1,14 @@
+include(${CMAKE_CURRENT_LIST_DIR}/mimalloc.cmake)
+get_filename_component(MIMALLOC_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH)  # one up from the cmake dir, e.g. /usr/local/lib/cmake/mimalloc-2.0
+get_filename_component(MIMALLOC_VERSION_DIR "${CMAKE_CURRENT_LIST_DIR}" NAME)
+string(REPLACE "/lib/cmake" "/lib" MIMALLOC_LIBRARY_DIR "${MIMALLOC_CMAKE_DIR}")
+if("${MIMALLOC_VERSION_DIR}" EQUAL "mimalloc")
+  # top level install
+  string(REPLACE "/lib/cmake" "/include" MIMALLOC_INCLUDE_DIR "${MIMALLOC_CMAKE_DIR}")
+  set(MIMALLOC_OBJECT_DIR "${MIMALLOC_LIBRARY_DIR}")
+else()
+  # versioned
+  string(REPLACE "/lib/cmake/" "/include/" MIMALLOC_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}")
+  string(REPLACE "/lib/cmake/" "/lib/" MIMALLOC_OBJECT_DIR "${CMAKE_CURRENT_LIST_DIR}")
+endif()
+set(MIMALLOC_TARGET_DIR "${MIMALLOC_LIBRARY_DIR}") # legacy

From e35e919ea43e63b29738ca94173b2252a7b3b6f1 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Thu, 13 Apr 2023 15:37:54 -0700
Subject: [PATCH 218/352] remove segment-cache as it is superseded by better
 arena management

---
 CMakeLists.txt                       |   1 -
 ide/vs2022/mimalloc-override.vcxproj |   1 -
 ide/vs2022/mimalloc.vcxproj          |   1 -
 include/mimalloc/internal.h          |   7 -
 src/heap.c                           |   4 -
 src/init.c                           |   1 -
 src/segment-cache.c                  | 277 ---------------------------
 src/segment.c                        |  36 +---
 src/static.c                         |   1 -
 9 files changed, 5 insertions(+), 324 deletions(-)
 delete mode 100644 src/segment-cache.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index de2689a3..2bcd1ef7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,6 @@ set(mi_sources
     src/page.c
     src/random.c
     src/segment.c
-    src/segment-cache.c
     src/segment-map.c
     src/stats.c
     src/prim/prim.c)
diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj
index 52ed5282..e2c7f71d 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override.vcxproj
@@ -257,7 +257,6 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
     <ClCompile Include="..\..\src\segment-map.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\stats.c" />
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 33a719c1..2916483d 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -235,7 +235,6 @@
     </ClCompile>
     <ClCompile Include="..\..\src\page.c" />
     <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
     <ClCompile Include="..\..\src\segment-map.c" />
     <ClCompile Include="..\..\src\segment.c" />
     <ClCompile Include="..\..\src\os.c" />
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index ab8c0d28..f4a08a09 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -120,16 +120,9 @@ void       _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_o
 void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld);
 void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld);
 bool       _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id);
-bool       _mi_arena_memid_is_os_allocated(mi_memid_t memid);
 void       _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats);
 bool       _mi_arena_contains(const void* p);
 
-// "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld);
-bool       _mi_segment_cache_push(void* start, size_t size, mi_memid_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
-void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
-void       _mi_segment_cache_free_all(mi_os_tld_t* tld);
-
 // "segment-map.c"
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
diff --git a/src/heap.c b/src/heap.c
index 9238812b..14c3d66c 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -163,10 +163,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
     _mi_segment_thread_collect(&heap->tld->segments);
   }
 
-  // decommit in global segment caches
-  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
-  _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os);  
-
   // collect regions on program-exit (or shared library unload)
   if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
     _mi_arena_collect(false /* destroy arenas */, true /* force purge */, &heap->tld->stats);
diff --git a/src/init.c b/src/init.c
index 5fb1ae43..17dc2faf 100644
--- a/src/init.c
+++ b/src/init.c
@@ -632,7 +632,6 @@ static void mi_cdecl mi_process_done(void) {
   // or C-runtime termination code.
   if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
     _mi_heap_destroy_all();                          // forcefully release all memory held by all heaps (of this thread only!)
-    _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os);  // release all cached segments
     _mi_arena_collect(true /* destroy (owned) arenas */, true /* purge the rest */, &_mi_heap_main_get()->tld->stats);
   }
 
diff --git a/src/segment-cache.c b/src/segment-cache.c
deleted file mode 100644
index 2aee27c6..00000000
--- a/src/segment-cache.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-  Implements a cache of segments to avoid expensive OS calls and to reuse
-  the commit_mask to optimize the commit/decommit calls.
-  The full memory map of all segments is also implemented here.
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc/internal.h"
-#include "mimalloc/atomic.h"
-
-#include "./bitmap.h"  // atomic bitmap
-
-// #define MI_CACHE_DISABLE 1    // define to completely disable the segment cache
-
-#define MI_CACHE_FIELDS     (16)
-#define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
-
-#define BITS_SET()          MI_ATOMIC_VAR_INIT(UINTPTR_MAX)
-#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)                          // note: update if MI_CACHE_FIELDS changes
-
-typedef struct mi_cache_slot_s {
-  void*               p;
-  mi_memid_t          memid;
-  bool                is_pinned;
-  mi_commit_mask_t    commit_mask;
-  mi_commit_mask_t    purge_mask;
-  _Atomic(mi_msecs_t) expire;
-} mi_cache_slot_t;
-
-static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
-
-static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
-static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
-static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
-
-static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) {
-  mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg);
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  return _mi_arena_memid_is_suitable(slot->memid, req_arena_id);
-}
-
-mi_decl_noinline static void* mi_segment_cache_pop_ex(
-                              bool all_suitable,
-                              size_t size, mi_commit_mask_t* commit_mask, 
-                              mi_commit_mask_t* purge_mask, bool large_allowed,
-                              bool* large, bool* is_pinned, bool* is_zero, 
-                              mi_arena_id_t _req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
-{
-#ifdef MI_CACHE_DISABLE
-  return NULL;
-#else
-
-  // only segment blocks
-  if (size != MI_SEGMENT_SIZE) return NULL;
-
-  // numa node determines start field
-  const int numa_node = _mi_os_numa_node(tld);
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // find an available slot and make it unavailable
-  mi_bitmap_index_t bitidx = 0;
-  bool claimed = false;
-  mi_arena_id_t req_arena_id = _req_arena_id;
-  mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable);  // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
-
-  if (large_allowed) {  // large allowed?
-    claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
-    if (claimed) *large = true;
-  }
-  if (!claimed) {
-    claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
-    if (claimed) *large = false;
-  }
-
-  if (!claimed) return NULL;
-
-  // no longer available but still in-use
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
-
-  // found a slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  void* p = slot->p;
-  *memid = slot->memid;
-  *is_pinned = slot->is_pinned;
-  *is_zero = false;
-  *commit_mask = slot->commit_mask;     
-  *purge_mask = slot->purge_mask;
-  slot->p = NULL;
-  mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
-  
-  // mark the slot as free again
-  _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
-  return p;
-#endif
-}
-
-
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
-{
-  return mi_segment_cache_pop_ex(false, size, commit_mask, purge_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld);
-}
-
-static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
-{
-  if (mi_commit_mask_is_empty(cmask)) {
-    // nothing
-  }
-  else if (mi_commit_mask_is_full(cmask)) {
-    // decommit the whole in one call
-    _mi_os_decommit(p, total, stats);
-  }
-  else {
-    // decommit parts
-    mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
-    size_t part = total/MI_COMMIT_MASK_BITS;
-    size_t idx;
-    size_t count;    
-    mi_commit_mask_foreach(cmask, idx, count) {
-      void*  start = (uint8_t*)p + (idx*part);
-      size_t size = count*part;
-      _mi_os_decommit(start, size, stats);
-    }
-    mi_commit_mask_foreach_end()
-  }
-  mi_commit_mask_create_empty(cmask);
-}
-
-#define MI_MAX_PURGE_PER_PUSH  (4)
-
-static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld)
-{
-  MI_UNUSED(tld);
-  if (!mi_option_is_enabled(mi_option_allow_purge)) return;
-  mi_msecs_t now = _mi_clock_now();
-  size_t purged = 0;
-  const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
-  size_t idx              = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
-  for (size_t visited = 0; visited < max_visits; visited++,idx++) {  // visit N slots
-    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
-    mi_cache_slot_t* slot = &cache[idx];
-    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
-    if (expire != 0 && (force || now >= expire)) {  // racy read
-      // seems expired, first claim it from available
-      purged++;
-      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);      
-      if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) {  // no need to check large as those cannot be decommitted anyways
-        // it was available, we claimed it (and made it unavailable)
-        mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
-        mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
-        // we can now access it safely
-        expire = mi_atomic_loadi64_acquire(&slot->expire);
-        if (expire != 0 && (force || now >= expire)) {  // safe read
-          mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
-          // still expired, decommit it
-          mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-          mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask));
-          _mi_abandoned_await_readers();  // wait until safe to decommit
-          // decommit committed parts
-          // TODO: instead of decommit, we could also free to the OS?
-          mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
-          mi_commit_mask_create_empty(&slot->purge_mask);
-        }
-        _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
-      }
-      if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
-    }
-  }
-}
-
-void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
-  if (force) {
-    // called on `mi_collect(true)` but not on thread termination    
-    _mi_segment_cache_free_all(tld);
-  }
-  else {
-    mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld);
-  }
-}
-
-void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
-  mi_commit_mask_t commit_mask;
-  mi_commit_mask_t purge_mask;
-  bool is_pinned;
-  bool is_zero;
-  bool is_large;
-  mi_memid_t memid;
-  const size_t size = MI_SEGMENT_SIZE;
-  void* p;
-  do {
-    // keep popping and freeing the memory
-    p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &purge_mask,
-                                true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld);
-    if (p != NULL) {
-      size_t csize = _mi_commit_mask_committed_size(&commit_mask, size);
-      if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); }
-      _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats);
-    }
-  } while (p != NULL);  
-}
-
-mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, mi_memid_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
-{
-#ifdef MI_CACHE_DISABLE
-  return false;
-#else
-
-  // purge expired entries
-  mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld);
-
-  // only cache normal segment blocks
-  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
-
-  // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast)
-  // This is a common case with reserved huge OS pages.
-  // 
-  // (note: we could also allow segments that are already fully decommitted but that never happens
-  //  as the first slice is always committed (for the segment metadata))
-  if (!_mi_arena_memid_is_os_allocated(memid) && is_pinned) return false;
-
-  // numa node determines start field
-  int numa_node = _mi_os_numa_node(NULL);
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // find an available slot
-  mi_bitmap_index_t bitidx;
-  bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-  if (!claimed) return false;
-
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx));
-#if MI_DEBUG>1
-  if (is_pinned || is_large) {
-    mi_assert_internal(mi_commit_mask_is_full(commit_mask));
-  }
-#endif
-
-  // set the slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  slot->p = start;
-  slot->memid = memid;
-  slot->is_pinned = is_pinned;
-  mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-  slot->commit_mask = *commit_mask;
-  slot->purge_mask = *purge_mask;
-  if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_purge)) {
-    long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult);
-    if (delay == 0) {
-      _mi_abandoned_await_readers(); // wait until safe to decommit
-      mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
-      mi_commit_mask_create_empty(&slot->purge_mask);
-    }
-    else {
-      mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
-    }
-  }
-
-  // make it available
-  _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx);
-  return true;
-#endif
-}
diff --git a/src/segment.c b/src/segment.c
index af4ed95c..7d0d2c28 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -11,7 +11,6 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <string.h>  // memset
 #include <stdio.h>
 
-#define MI_USE_SEGMENT_CACHE 0
 #define MI_PAGE_HUGE_ALIGN   (256*1024)
 
 static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats);
@@ -393,28 +392,11 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   // purge delayed decommits now? (no, leave it to the arena)
   // mi_segment_try_purge(segment,true,tld->stats);
   
-  // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
-#if MI_USE_SEGMENT_CACHE  
-  if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache
-     || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->purge_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
-#endif       
-  {
-    const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
-    /*
-    // if not all committed, an arena may decommit the whole area, but that double counts
-    // the already decommitted parts; adjust for that in the stats.
-    if (!mi_commit_mask_is_full(&segment->commit_mask)) {
-      const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
-      mi_assert_internal(size > csize);
-      if (size > csize) {
-        _mi_stat_increase(&_mi_stats_main.committed, size - csize); 
-      }
-    }
-    */
-    _mi_abandoned_await_readers();  // wait until safe to free
-    _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, csize, tld->stats);
-  }
+  const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
+
+  _mi_abandoned_await_readers();  // wait until safe to free
+  _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, csize, tld->stats);
 }
 
 // called by threads that are terminating 
@@ -819,6 +801,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
                                           bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 
 {
+  MI_UNUSED(ppurge_mask);
   mi_memid_t memid;
   bool   mem_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy
   bool   is_pinned = false;
@@ -837,15 +820,6 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
   }
   const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE;
   mi_segment_t* segment = NULL;
-
-  #if MI_USE_SEGMENT_CACHE  
-  // get from cache?
-  if (page_alignment == 0) {
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, ppurge_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
-  }
-  #else
-  MI_UNUSED(ppurge_mask);
-  #endif
   
   // get from OS
   if (segment==NULL) {
diff --git a/src/static.c b/src/static.c
index 831e9ecd..bc05dd72 100644
--- a/src/static.c
+++ b/src/static.c
@@ -32,7 +32,6 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "page.c"           // includes page-queue.c
 #include "random.c" 
 #include "segment.c"
-#include "segment-cache.c"
 #include "segment-map.c"
 #include "stats.c"
 #include "prim/prim.c"

From 66aa7a17ace23315f2b1e44c9b9cfd874ac67af1 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Mon, 17 Apr 2023 09:13:08 -0700
Subject: [PATCH 219/352] further fix for atomic build error suggested by
 Davide Di Gennaro (issue #729, pr #724)

---
 include/mimalloc/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h
index 1951b470..130ef820 100644
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@@ -281,7 +281,7 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t;
 static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
   if (mi_atomic_load_relaxed(once) != 0) return false;     // quick test 
   uintptr_t expected = 0;
-  return mi_atomic_cas_strong_acq_rel(once, &expected, 1UL); // try to set to 1
+  return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1
 }
 
 // Yield

From 6d42f2ac390e70424ffd402449a50c74079fd455 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Mon, 17 Apr 2023 11:35:51 -0700
Subject: [PATCH 220/352] increase pipeline timeout to 10min for tsan

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index c0f7ec74..1804be26 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -121,7 +121,7 @@ jobs:
       cmakeArgs: .. $(cmakeExtraArgs)
   - script: make -j$(nproc) -C $(BuildType)
     displayName: Make
-  - script: ctest --verbose --timeout 300
+  - script: ctest --verbose --timeout 600
     workingDirectory: $(BuildType)
     displayName: CTest
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)

From 015aac05a5938ec82a45e5fe101de2adb6f7f6a8 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Mon, 17 Apr 2023 12:24:51 -0700
Subject: [PATCH 221/352] keep tsan enabled for dev-slice

---
 azure-pipelines.yml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index cb9b08fe..0247c76f 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -108,12 +108,11 @@ jobs:
         CXX: clang++
         BuildType: debug-ubsan-clang
         cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_DEBUG_UBSAN=ON
-      # Disable for now as it times out on the azure build machines
-      #  Debug TSAN Clang++:
-      #    CC: clang
-      #    CXX: clang++
-      #    BuildType: debug-tsan-clang-cxx
-      #    cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON
+      Debug TSAN Clang++:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-tsan-clang-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON
       
   steps:
   - task: CMake@1

From 95c2059e89b75da2507184123a9aee15921c0788 Mon Sep 17 00:00:00 2001
From: daanx <daanx@effp.org>
Date: Sun, 23 Apr 2023 19:48:49 -0700
Subject: [PATCH 222/352] fix asan mem tracking

---
 src/segment.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 442b187a..a80c36ac 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -877,18 +877,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
                                               &segment_slices, &pre_size, &info_slices, commit, tld, os_tld);
   if (segment == NULL) return NULL;
   
-  // zero the segment info? -- not always needed as it may be zero initialized from the OS 
-  ptrdiff_t ofs    = offsetof(mi_segment_t, next);
-  size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
-  size_t    zsize  = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more  
+  // zero the segment info? -- not always needed as it may be zero initialized from the OS   
   if (!segment->memid.was_zero) {
+    ptrdiff_t ofs    = offsetof(mi_segment_t, next);
+    size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
+    size_t    zsize  = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more  
     _mi_memzero((uint8_t*)segment + ofs, zsize);
   }
-  else {
-    mi_track_mem_defined((uint8_t*)segment + ofs,zsize);
-    mi_assert(mi_mem_is_zero((uint8_t*)segment + ofs, zsize));
-  }
-
   
   // initialize the rest of the segment info
   const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);

From 5c9013302107d1f5f2f087f43975b49f0cd764e4 Mon Sep 17 00:00:00 2001
From: "microsoft-github-policy-service[bot]"
 <77245923+microsoft-github-policy-service[bot]@users.noreply.github.com>
Date: Fri, 2 Jun 2023 17:40:26 +0000
Subject: [PATCH 223/352] Microsoft mandatory file

---
 SECURITY.md | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 SECURITY.md

diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 00000000..e138ec5d
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,41 @@
+<!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
+
+## Security
+
+Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
+
+If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
+
+## Reporting Security Issues
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
+
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
+
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
+
+Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
+
+  * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
+  * Full paths of source file(s) related to the manifestation of the issue
+  * The location of the affected source code (tag/branch/commit or direct URL)
+  * Any special configuration required to reproduce the issue
+  * Step-by-step instructions to reproduce the issue
+  * Proof-of-concept or exploit code (if possible)
+  * Impact of the issue, including how an attacker might exploit the issue
+
+This information will help us triage your report more quickly.
+
+If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
+
+## Preferred Languages
+
+We prefer all communications to be in English.
+
+## Policy
+
+Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
+
+<!-- END MICROSOFT SECURITY.MD BLOCK -->

From 280123bd5c84e25579172e5a86c4285b92347d65 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Fri, 1 Mar 2024 15:25:57 -0800
Subject: [PATCH 224/352] purge on page free as well

---
 src/segment.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index eca50d46..8e14ac07 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1026,6 +1026,10 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
     // only abandoned pages; remove from free list and abandon
     mi_segment_abandon(segment,tld);
   }
+  else {
+    // perform delayed purges
+    mi_segment_try_purge(segment, false /* force? */, tld->stats);
+  }
 }
 
 
@@ -1239,17 +1243,17 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
 
 static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld)
 {
-  *reclaimed = false;  
+  *reclaimed = false;
   mi_segment_t* segment;
   mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap,&current);
-  
+
   // limit the tries to 10% (default) of the abandoned segments with at least 8 tries, and at most 1024.
   const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100);
   if (perc <= 0) return NULL;
   const size_t abandoned_count = _mi_arena_segment_abandoned_count();
   const size_t relative_count = (abandoned_count > 10000 ? (abandoned_count / 100) * perc : (abandoned_count * perc) / 100); // avoid overflow
   long max_tries = (long)(relative_count < 8 ? 8 : (relative_count > 1024 ? 1024 : relative_count));
-  while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL)) 
+  while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL))
   {
     segment->abandoned_visits++;
     // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
@@ -1288,7 +1292,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
 {
   mi_segment_t* segment;
   mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current);
-  int max_tries = (force ? 16*1024 : 1024); // limit latency  
+  int max_tries = (force ? 16*1024 : 1024); // limit latency
   while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL)) {
     mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees)
     if (segment->used == 0) {

From 5a2ed6d97762758ad4c349c110b263d474070dd9 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Sat, 2 Mar 2024 18:27:06 -0800
Subject: [PATCH 225/352] fix assertion

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index 5c40c00e..7ada9b80 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1166,7 +1166,6 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
 // Reclaim an abandoned segment; returns NULL if the segment was freed
 // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
 static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
-  mi_assert_expensive(mi_segment_is_valid(segment, tld));
   if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
   // can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free.
   mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id());
@@ -1216,6 +1215,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
   }
 
   mi_assert(segment->abandoned == 0);
+  mi_assert_expensive(mi_segment_is_valid(segment, tld));
   if (segment->used == 0) {  // due to page_clear
     mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed));
     mi_segment_free(segment, false, tld);

From af3f2f9168fc0f2345c23d8c8b34a73563935834 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sun, 24 Mar 2024 22:32:32 -0700
Subject: [PATCH 226/352] fix assertions and warnings on gcc

---
 src/page-queue.c | 2 +-
 src/page.c       | 4 ++--
 src/segment.c    | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/page-queue.c b/src/page-queue.c
index 751caf95..470d1b64 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -141,7 +141,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
 }
 #endif
 
-static inline bool mi_page_is_large_or_huge(mi_page_t* page) {
+static inline bool mi_page_is_large_or_huge(const mi_page_t* page) {
   return (mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_huge(page));
 }
 
diff --git a/src/page.c b/src/page.c
index 808d863c..6bd53296 100644
--- a/src/page.c
+++ b/src/page.c
@@ -428,7 +428,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   _mi_segment_page_free(page, force, segments_tld);
 }
 
-#define MI_MAX_RETIRE_SIZE    MI_LARGE_OBJ_SIZE_MAX   // should be less than size for MI_BIN_HUGE
+#define MI_MAX_RETIRE_SIZE    MI_MEDIUM_OBJ_SIZE_MAX   // should be less than size for MI_BIN_HUGE
 #define MI_RETIRE_CYCLES      (16)
 
 // Retire a page with no more used blocks
@@ -455,7 +455,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
   if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) {  // not full or huge queue?
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
+      page->retire_expire = 1+(bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
       mi_heap_t* heap = mi_page_heap(page);
       mi_assert_internal(pq >= heap->pages);
       const size_t index = pq - heap->pages;
diff --git a/src/segment.c b/src/segment.c
index 2d29a5f3..e7843f37 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -601,7 +601,7 @@ static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t*
 ----------------------------------------------------------- */
 
 static bool mi_segment_is_abandoned(mi_segment_t* segment) {
-  return (segment->thread_id == 0);
+  return (mi_atomic_load_relaxed(&segment->thread_id) == 0);
 }
 
 // note: can be called on abandoned segments
@@ -654,8 +654,8 @@ static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld
 // note: can be called on abandoned segments
 static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) {
   mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0);
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  bool is_abandoned = mi_segment_is_abandoned(segment);
+  mi_segment_t* const segment = _mi_ptr_segment(slice);
+  const bool is_abandoned = (segment->thread_id == 0); // mi_segment_is_abandoned(segment);
 
   // for huge pages, just mark as free but don't add to the queues
   if (segment->kind == MI_SEGMENT_HUGE) {

From 7c17c3d33ed03a5cc19144cb99e3a8030b1c7cdf Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sun, 24 Mar 2024 22:41:33 -0700
Subject: [PATCH 227/352] optimize page struct layout

---
 include/mimalloc/types.h | 7 ++++---
 src/init.c               | 6 +++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index b8e7f97d..a08555ee 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -292,14 +292,15 @@ typedef struct mi_page_s {
   // layout like this to optimize access in `mi_malloc` and `mi_free`
   uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
   uint16_t              reserved;          // number of blocks reserved in memory
-  uint16_t              used;              // number of blocks in use (including blocks in `thread_free`)
   mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
-  uint8_t               block_size_shift;  // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
   uint8_t               free_is_zero:1;    // `true` if the blocks in the free list are zero initialized
   uint8_t               retire_expire:7;   // expiration count for retired blocks
-                                           // padding
+
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
+  uint16_t              used;              // number of blocks in use (including blocks in `thread_free`)
+  uint8_t               block_size_shift;  // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
+                                           // padding
   size_t                block_size;        // size available in each block (always `>0`)
   uint8_t*              page_start;        // start of the page area containing the blocks
 
diff --git a/src/init.c b/src/init.c
index 1a6a30e5..33161062 100644
--- a/src/init.c
+++ b/src/init.c
@@ -14,17 +14,17 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Empty page used to initialize the small free pages array
 const mi_page_t _mi_page_empty = {
-  0, 
+  0,
   false, false, false, false,
   0,       // capacity
   0,       // reserved capacity
-  0,       // used
   { 0 },   // flags
-  0,       // block size shift
   false,   // is_zero
   0,       // retire_expire
   NULL,    // free
   NULL,    // local_free
+  0,       // used
+  0,       // block size shift
   0,       // block_size
   NULL,    // page_start
   #if (MI_PADDING || MI_ENCODE_FREELIST)

From cc8d89a08528500572390a648a874bc705bd91b2 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Mon, 25 Mar 2024 07:35:49 -0700
Subject: [PATCH 228/352] update comments

---
 include/mimalloc/types.h | 57 ++++++++++++++++++++++++----------------
 src/page.c               |  2 +-
 src/segment.c            | 20 +++++++-------
 3 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index a08555ee..4e96c5ec 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -13,9 +13,12 @@ terms of the MIT license. A copy of the license can be found in the file
 // mi_heap_t      : all data for a thread-local heap, contains
 //                  lists of all managed heap pages.
 // mi_segment_t   : a larger chunk of memory (32GiB) from where pages
-//                  are allocated.
-// mi_page_t      : a mimalloc page (usually 64KiB or 512KiB) from
+//                  are allocated. A segment is divided in slices (64KiB) from
+//                  which pages are allocated.
+// mi_page_t      : a "mimalloc" page (usually 64KiB or 512KiB) from
 //                  where objects are allocated.
+//                  Note: we always explicitly use "OS page" to refer to OS pages
+//                  and just use "page" to refer to mimalloc pages (`mi_page_t`)
 // --------------------------------------------------------------------------
 
 
@@ -192,15 +195,15 @@ typedef int32_t  mi_ssize_t;
 #error "mimalloc internal: define more bins"
 #endif
 
-// Maximum slice offset (15)
-#define MI_MAX_SLICE_OFFSET               ((MI_BLOCK_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
-
 // blocks up to this size are always allocated aligned
 #define MI_MAX_ALIGN_GUARANTEE            (8*MI_MAX_ALIGN_SIZE)
 
 // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
 #define MI_BLOCK_ALIGNMENT_MAX            (MI_SEGMENT_SIZE >> 1)
 
+// Maximum slice count (255) for which we can find the page for interior pointers
+#define MI_MAX_SLICE_OFFSET_COUNT         ((MI_BLOCK_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
+
 
 // ------------------------------------------------------
 // Mimalloc pages contain allocated blocks
@@ -285,9 +288,9 @@ typedef struct mi_page_s {
   // "owned" by the segment
   uint32_t              slice_count;       // slices in this page (0 if not a page)
   uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
-  uint8_t               is_committed : 1;  // `true` if the page virtual memory is committed
-  uint8_t               is_zero_init : 1;  // `true` if the page was initially zero initialized
-  uint8_t               is_huge:1;         // `true` if the page is in a huge segment
+  uint8_t               is_committed:1;    // `true` if the page virtual memory is committed
+  uint8_t               is_zero_init:1;    // `true` if the page was initially zero initialized
+  uint8_t               is_huge:1;         // `true` if the page is in a huge segment (`segment->kind == MI_SEGMENT_HUGE`)
                                            // padding
   // layout like this to optimize access in `mi_malloc` and `mi_free`
   uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
@@ -328,12 +331,13 @@ typedef enum mi_page_kind_e {
   MI_PAGE_SMALL,    // small blocks go into 64KiB pages inside a segment
   MI_PAGE_MEDIUM,   // medium blocks go into medium pages inside a segment
   MI_PAGE_LARGE,    // larger blocks go into a page of just one block
-  MI_PAGE_HUGE,     // huge blocks (> 16 MiB) are put into a single page in a single segment.
+  MI_PAGE_HUGE,     // huge blocks (> `MI_LARGE_OBJ_SIZE_MAX) or with alignment `> MI_BLOCK_ALIGNMENT_MAX`
+                    // are put into a single page in a single `MI_SEGMENT_HUGE` segment.
 } mi_page_kind_t;
 
 typedef enum mi_segment_kind_e {
   MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
-  MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
+  MI_SEGMENT_HUGE,   // segment with just one huge page inside.
 } mi_segment_kind_t;
 
 // ------------------------------------------------------
@@ -404,39 +408,48 @@ typedef struct mi_memid_s {
 } mi_memid_t;
 
 
-// Segments are large allocated memory blocks (8mb on 64 bit) from
-// the OS. Inside segments we allocated fixed size _pages_ that
-// contain blocks.
+// Segments are large allocated memory blocks (8mb on 64 bit) from arenas or the OS.
+//
+// Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks.
+// The start of a segment is this structure with a fixed number of slice entries (`slices`)
+// usually followed by a guard OS page and the actual allocation area with pages.
+// While a page is not allocated, we view it's data as a `mi_slice_t` (instead of a `mi_page_t`).
+// Of any free area, the first slice has the info and `slice_offset == 0`; for any subsequent
+// slices part of the area, the `slice_offset` is the byte offset back to the first slice
+// (so we can quickly find the page info on a free, `internal.h:_mi_segment_page_of`).
+// For slices, the `block_size` field is repurposed to signify if a slice is used (`1`) or not (`0`).
+// Small and medium pages use a fixed amount of slices to reduce slice fragmentation, while
+// large and huge pages span a variable amount of slices.
 typedef struct mi_segment_s {
   // constant fields
-  mi_memid_t        memid;              // memory id for arena allocation
-  bool              allow_decommit;
-  bool              allow_purge;
+  mi_memid_t        memid;              // memory id for arena/OS allocation
+  bool              allow_decommit;     // can we decommmit the memory
+  bool              allow_purge;        // can we purge the memory (reset or decommit)
   size_t            segment_size;
 
   // segment fields
-  mi_msecs_t        purge_expire;
-  mi_commit_mask_t  purge_mask;
-  mi_commit_mask_t  commit_mask;
+  mi_msecs_t        purge_expire;       // purge slices in the `purge_mask` after this time
+  mi_commit_mask_t  purge_mask;         // slices that can be purged
+  mi_commit_mask_t  commit_mask;        // slices that are currently committed
 
   // from here is zero initialized
   struct mi_segment_s* next;            // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
   bool              was_reclaimed;      // true if it was reclaimed (used to limit on-free reclamation)
 
   size_t            abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
-  size_t            abandoned_visits;   // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
+  size_t            abandoned_visits;   // count how often this segment is visited during abondoned reclamation (to force reclaim if it takes too long)
   size_t            used;               // count of pages in use
   uintptr_t         cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
 
   size_t            segment_slices;      // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
-  size_t            segment_info_slices; // initial slices we are using segment info and possible guard pages.
+  size_t            segment_info_slices; // initial count of slices that we are using for segment info and possible guard pages.
 
   // layout like this to optimize access in `mi_free`
   mi_segment_kind_t kind;
   size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
   _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
 
-  mi_slice_t        slices[MI_SLICES_PER_SEGMENT+1];  // one more for huge blocks with large alignment
+  mi_slice_t        slices[MI_SLICES_PER_SEGMENT+1];  // one extra final entry for huge blocks with large alignment
 } mi_segment_t;
 
 
diff --git a/src/page.c b/src/page.c
index 6bd53296..05de541a 100644
--- a/src/page.c
+++ b/src/page.c
@@ -455,7 +455,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
   if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) {  // not full or huge queue?
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      page->retire_expire = 1+(bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
+      page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
       mi_heap_t* heap = mi_page_heap(page);
       mi_assert_internal(pq >= heap->pages);
       const size_t index = pq - heap->pages;
diff --git a/src/segment.c b/src/segment.c
index e7843f37..1d2f1e47 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -11,7 +11,11 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <string.h>  // memset
 #include <stdio.h>
 
-#define MI_PAGE_HUGE_ALIGN   (256*1024)
+// -------------------------------------------------------------------
+// Segments
+// mimalloc pages reside in segments. See `mi_segment_valid` for invariants.
+// -------------------------------------------------------------------
+
 
 static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats);
 
@@ -146,10 +150,6 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) {
 
 /* --------------------------------------------------------------------------------
   Segment allocation
-
-  If a  thread ends, it "abandons" pages with used blocks
-  and there is an abandoned segment list whose segments can
-  be reclaimed by still running threads, much like work-stealing.
 -------------------------------------------------------------------------------- */
 
 
@@ -268,10 +268,10 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
     mi_assert_internal(slice->slice_offset == 0);
     size_t index = mi_slice_index(slice);
     size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1;
-    if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets
+    if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET_COUNT valid back offsets
       used_count++;
-      if (segment->kind == MI_SEGMENT_HUGE) { mi_assert_internal(slice->is_huge); }
-      for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) {
+      mi_assert_internal(slice->is_huge == (segment->kind == MI_SEGMENT_HUGE));
+      for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET_COUNT && index + i <= maxindex; i++) {
         mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t));
         mi_assert_internal(i==0 || segment->slices[index + i].slice_count == 0);
         mi_assert_internal(i==0 || segment->slices[index + i].block_size == 1);
@@ -720,9 +720,9 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   mi_page_t*  page = mi_slice_to_page(slice);
   mi_assert_internal(mi_page_block_size(page) == bsize);
 
-  // set slice back pointers for the first MI_MAX_SLICE_OFFSET entries
+  // set slice back pointers for the first MI_MAX_SLICE_OFFSET_COUNT entries
   size_t extra = slice_count-1;
-  if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET;
+  if (extra > MI_MAX_SLICE_OFFSET_COUNT) extra = MI_MAX_SLICE_OFFSET_COUNT;
   if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1;  // huge objects may have more slices than avaiable entries in the segment->slices
 
   mi_slice_t* slice_next = slice + 1;

From a7c033caedef7af5e9d6f9a2318e2b8171c18215 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 19 Apr 2024 10:14:27 -0700
Subject: [PATCH 229/352] avoid unused warning

---
 src/alloc-aligned.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 0495c11d..b7e589ea 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -159,7 +159,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap,
 
 // ensure a definition is emitted
 #if defined(__cplusplus)
-static void* _mi_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned;
+void* _mi_extern_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned;
 #endif
 
 // ------------------------------------------------------

From 6c5d6e1f721cd3eb369b93e4a1931d180a55a873 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 10 May 2024 17:24:52 -0700
Subject: [PATCH 230/352] fix max allocation size on 32-bit systems (issue
 #882)

---
 include/mimalloc/types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 97438569..e2b9ce38 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -214,7 +214,7 @@ typedef int32_t  mi_ssize_t;
 
 // we never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
 // on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877)
-#if PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX)
+#if (PTRDIFF_MAX > INT32_MAX) && (PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX))
 #define MI_MAX_ALLOC_SIZE   (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1))
 #else
 #define MI_MAX_ALLOC_SIZE   PTRDIFF_MAX

From d824b9db2b339650b4dd04ffae5ede8abd84889c Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 13 May 2024 10:11:57 -0700
Subject: [PATCH 231/352] fix page collection where a freed segment could be
 accessed

---
 src/heap.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/heap.c b/src/heap.c
index 2fb04f7a..6c56edd6 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -95,6 +95,11 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
   mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
   mi_collect_t collect = *((mi_collect_t*)arg_collect);
   _mi_page_free_collect(page, collect >= MI_FORCE);
+  if (collect == MI_FORCE) {
+    // note: call before a potential `_mi_page_free` as the segment may be freed if this was the last used page in that segment.
+    mi_segment_t* segment = _mi_page_segment(page);
+    _mi_segment_collect(segment, true /* force? */, &heap->tld->segments);
+  }
   if (mi_page_all_free(page)) {
     // no more used blocks, free the page.
     // note: this will free retired pages as well.
@@ -104,10 +109,6 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
     // still used blocks but the thread is done; abandon the page
     _mi_page_abandon(page, pq);
   }
-  if (collect == MI_FORCE) {
-    mi_segment_t* segment = _mi_page_segment(page);
-    _mi_segment_collect(segment, true /* force? */, &heap->tld->segments);
-  }
   return true; // don't break
 }
 

From 44b65b19df9107c45147df31fcafedf135619411 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Thu, 16 May 2024 13:30:33 -0700
Subject: [PATCH 232/352] remove pre_size parameter for slices

---
 src/segment.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 9ac22f15..9e1b39a2 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -347,7 +347,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
 }
 
 
-static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) {
+static size_t mi_segment_calculate_slices(size_t required, size_t* info_slices) {
   size_t page_size = _mi_os_page_size();
   size_t isize     = _mi_align_up(sizeof(mi_segment_t), page_size);
   size_t guardsize = 0;
@@ -361,7 +361,6 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz
     }
   }
 
-  if (pre_size != NULL) *pre_size = isize;
   isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE);
   if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE;
   size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) );
@@ -808,7 +807,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
 ----------------------------------------------------------- */
 
 static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id,
-                                          size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices,
+                                          size_t* psegment_slices, size_t* pinfo_slices,
                                           bool commit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 
 {
@@ -825,7 +824,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
     align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN );
     const size_t extra = align_offset - info_size;
     // recalculate due to potential guard pages
-    *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices);
+    *psegment_slices = mi_segment_calculate_slices(required + extra, pinfo_slices);
     mi_assert_internal(*psegment_slices > 0 && *psegment_slices <= UINT32_MAX);
   }
 
@@ -874,8 +873,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
 
   // calculate needed sizes first
   size_t info_slices;
-  size_t pre_size;
-  size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
+  size_t segment_slices = mi_segment_calculate_slices(required, &info_slices);
   mi_assert_internal(segment_slices > 0 && segment_slices <= UINT32_MAX);
 
   // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
@@ -887,7 +885,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
 
   // Allocate the segment from the OS
   mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id,
-                                              &segment_slices, &pre_size, &info_slices, commit, tld, os_tld);
+                                              &segment_slices, &info_slices, commit, tld, os_tld);
   if (segment == NULL) return NULL;
 
   // zero the segment info? -- not always needed as it may be zero initialized from the OS
@@ -915,8 +913,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   if (MI_SECURE>0) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data, and at the end of the segment.
-    size_t os_pagesize = _mi_os_page_size();
-    mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size);
+    size_t os_pagesize = _mi_os_page_size();    
     _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize);
     uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize;
     mi_segment_ensure_committed(segment, end, os_pagesize, tld->stats);

From 4a26a4568e0f593b7842d91fbf4ec5f80d06bc65 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Thu, 16 May 2024 14:26:05 -0700
Subject: [PATCH 233/352] fix out-of-bounds write on span free in huge segments

---
 src/segment.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index 9e1b39a2..6044c270 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -623,7 +623,9 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
   mi_assert_internal(slice->slice_count == slice_count); // no overflow?
   slice->slice_offset = 0;
   if (slice_count > 1) {
-    mi_slice_t* last = &segment->slices[slice_index + slice_count - 1];
+    mi_slice_t* last = slice + slice_count - 1;
+    mi_slice_t* end  = (mi_slice_t*)mi_segment_slices_end(segment);
+    if (last > end) { last = end; }
     last->slice_count = 0;
     last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1));
     last->block_size = 0;

From 3c5e480ce73b02cd8bd8eca0846b2baf930c265d Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 17 Jun 2024 16:21:46 -0700
Subject: [PATCH 234/352] fix alignment test

---
 src/segment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index 869e05a8..bea43210 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -332,6 +332,8 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
     if (block_size <= 64) { start_offset += 3*block_size; }
     else if (block_size <= 512) { start_offset += block_size; }
   }
+  mi_assert_internal(_mi_is_aligned(pstart + start_offset, MI_MAX_ALIGN_SIZE));
+  mi_assert_internal(block_size == 0 || block_size > MI_MAX_ALIGN_GUARANTEE || _mi_is_aligned(pstart + start_offset,block_size));
   if (page_size != NULL) { *page_size = psize - start_offset; }
   return (pstart + start_offset);
 }
@@ -360,8 +362,6 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* info_slices)
       required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size;
     }
   }
-  mi_assert_internal(_mi_is_aligned(p, MI_MAX_ALIGN_SIZE));
-  mi_assert_internal(block_size > MI_MAX_ALIGN_GUARANTEE || _mi_is_aligned(p,block_size));
 
   isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE);
   if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE;

From 01503df7f3bb9bc46c74d67bc5060552f9f66ded Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 12 Aug 2024 13:51:39 -0700
Subject: [PATCH 235/352] move declaration to avoid gcc warning, see issue #919

---
 src/segment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index bea43210..f8e98655 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -663,8 +663,7 @@ static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld
 static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) {
   mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0);
   mi_segment_t* const segment = _mi_ptr_segment(slice);
-  const bool is_abandoned = (segment->thread_id == 0); // mi_segment_is_abandoned(segment);
-
+  
   // for huge pages, just mark as free but don't add to the queues
   if (segment->kind == MI_SEGMENT_HUGE) {
     // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case)
@@ -676,6 +675,7 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
   }
 
   // otherwise coalesce the span and add to the free span queues
+  const bool is_abandoned = (segment->thread_id == 0); // mi_segment_is_abandoned(segment);
   size_t slice_count = slice->slice_count;
   mi_slice_t* next = slice + slice->slice_count;
   mi_assert_internal(next <= mi_segment_slices_end(segment));

From f163164d364557e341b8e32684502fcb0ee60b58 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Tue, 20 Aug 2024 13:12:51 -0700
Subject: [PATCH 236/352] ensure start-offset in a segment respects minimal
 alignment

---
 src/segment.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/segment.c b/src/segment.c
index f8e98655..1906e8ac 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -332,6 +332,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
     if (block_size <= 64) { start_offset += 3*block_size; }
     else if (block_size <= 512) { start_offset += block_size; }
   }
+  start_offset = _mi_align_up(start_offset, MI_MAX_ALIGN_SIZE);
   mi_assert_internal(_mi_is_aligned(pstart + start_offset, MI_MAX_ALIGN_SIZE));
   mi_assert_internal(block_size == 0 || block_size > MI_MAX_ALIGN_GUARANTEE || _mi_is_aligned(pstart + start_offset,block_size));
   if (page_size != NULL) { *page_size = psize - start_offset; }

From ad02086d3b45de030680b895762fa8a018edd07e Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 21 Aug 2024 17:07:01 -0700
Subject: [PATCH 237/352] remove default MI_DEBUG_GUARDED

---
 include/mimalloc/types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 4540998f..69f737b3 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -75,7 +75,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Use guard pages behind objects of a certain size (set by the MIMALLOC_DEBUG_GUARDED_MIN/MAX options)
 // Padding should be disabled when using guard pages
-#define MI_DEBUG_GUARDED 1
+// #define MI_DEBUG_GUARDED 1
 #if defined(MI_DEBUG_GUARDED)
 #define MI_PADDING  0
 #endif

From 723869014ff71b12c585bf9b9b51ee4128d1b71f Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Wed, 9 Oct 2024 21:24:20 -0700
Subject: [PATCH 238/352] add ability to abandon segments after a threshold

---
 include/mimalloc.h          |  1 +
 include/mimalloc/internal.h |  2 +
 include/mimalloc/types.h    |  2 +-
 src/arena-abandon.c         |  2 +-
 src/options.c               |  1 +
 src/page.c                  | 21 ++++++++++
 src/segment.c               | 83 ++++++++++++++++++++++++++++++++++++-
 7 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index a5b3cc9d..df85a2c0 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -369,6 +369,7 @@ typedef enum mi_option_e {
   mi_option_visit_abandoned,            // allow visiting heap blocks from abandoned threads (=0)
   mi_option_debug_guarded_min,          // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects (=0)
   mi_option_debug_guarded_max,          // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects (=0)
+  mi_option_target_segments_per_thread, // experimental (=0)
   _mi_option_last,
   // legacy option names
   mi_option_large_os_pages = mi_option_allow_large_os_pages,
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index aff6a1bd..b4e74789 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -175,6 +175,8 @@ void       _mi_page_retire(mi_page_t* page) mi_attr_noexcept;                  /
 void       _mi_page_unfull(mi_page_t* page);
 void       _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force);   // free the page
 void       _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);            // abandon the page, to be picked up by another thread...
+void       _mi_page_force_abandon(mi_page_t* page);
+
 void       _mi_heap_delayed_free_all(mi_heap_t* heap);
 bool       _mi_heap_delayed_free_partial(mi_heap_t* heap);
 void       _mi_heap_collect_retired(mi_heap_t* heap, bool force);
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 69f737b3..044d6eae 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -200,7 +200,7 @@ typedef int32_t  mi_ssize_t;
 #define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 8KiB on 64-bit
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128KiB on 64-bit
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32MiB on 64-bit
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 16MiB on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 
 // Maximum number of size classes. (spaced exponentially in 12.5% increments)
diff --git a/src/arena-abandon.c b/src/arena-abandon.c
index eaa8c7c9..84b9f72c 100644
--- a/src/arena-abandon.c
+++ b/src/arena-abandon.c
@@ -192,7 +192,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool vi
   else {
     // otherwise visit all starting at a random location
     if (abandoned_count > abandoned_list_count && max_arena > 0) {
-      current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
+      current->start = 0; // (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
       current->end = current->start + max_arena;
     }
     else {
diff --git a/src/options.c b/src/options.c
index 1cfb2f17..c97b9abe 100644
--- a/src/options.c
+++ b/src/options.c
@@ -100,6 +100,7 @@ static mi_option_desc_t options[_mi_option_last] =
 #endif
   { 0,   UNINIT, MI_OPTION(debug_guarded_min) },        // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects
   { 0,   UNINIT, MI_OPTION(debug_guarded_max) },        // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects
+  { 0,   UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable.
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/src/page.c b/src/page.c
index 99ef3835..5671c7d4 100644
--- a/src/page.c
+++ b/src/page.c
@@ -405,6 +405,27 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
 }
 
 
+// force abandon a page; this is safe to call
+void _mi_page_force_abandon(mi_page_t* page) {
+  mi_heap_t* heap = mi_page_heap(page);
+  // mark page as not using delayed free
+  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
+  
+  // ensure this page is no longer in the heap delayed free list
+  _mi_heap_delayed_free_all(heap);
+  if (page->block_size == 0) return; // it may have been freed now
+
+  // and now unlink it from the page queue and abandon (or free)
+  mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
+  if (mi_page_all_free(page)) {
+    _mi_page_free(page, pq, false);
+  }
+  else {
+    _mi_page_abandon(page, pq);
+  }
+}
+
+
 // Free a page with no more free blocks
 void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   mi_assert_internal(page != NULL);
diff --git a/src/segment.c b/src/segment.c
index 1f1dc006..bb7483f1 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -693,6 +693,8 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
       // free previous slice -- remove it from free and merge
       mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
       slice_count += prev->slice_count;
+      slice->slice_count = 0;
+      slice->slice_offset = (uint32_t)((uint8_t*)slice - (uint8_t*)prev); // set the slice offset for `segment_force_abandon` (in case the previous free block is very large).
       if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); }
       slice = prev;
     }
@@ -1329,7 +1331,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
       result = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
       break;
     }
-    else if (segment->abandoned_visits > 3 && is_suitable) {
+    else if (segment->abandoned_visits > 3 && is_suitable && !mi_option_is_enabled(mi_option_target_segments_per_thread)) {
       // always reclaim on 3rd visit to limit the abandoned queue length.
       mi_segment_reclaim(segment, heap, 0, NULL, tld);
     }
@@ -1343,7 +1345,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
   return result;
 }
 
-
+// collect abandoned segments
 void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
 {
   mi_segment_t* segment;
@@ -1367,6 +1369,80 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
   _mi_arena_field_cursor_done(&current);
 }
 
+/* -----------------------------------------------------------
+   Force abandon a segment that is in use by our thread
+----------------------------------------------------------- */
+
+// force abandon a segment
+static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* tld)
+{
+  mi_assert_internal(!mi_segment_is_abandoned(segment));
+
+  // for all slices
+  const mi_slice_t* end;
+  mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
+  while (slice < end) {
+    mi_assert_internal(slice->slice_count > 0);
+    mi_assert_internal(slice->slice_offset == 0);
+    if (mi_slice_is_used(slice)) {
+      // ensure used count is up to date and collect potential concurrent frees
+      mi_page_t* const page = mi_slice_to_page(slice);
+      _mi_page_free_collect(page, false);
+      {
+        // abandon the page if it is still in-use (this will free it if possible as well)
+        mi_assert_internal(segment->used > 0);
+        if (segment->used == segment->abandoned+1) {
+          // the last page.. abandon and return as the segment will be abandoned after this
+          // and we should no longer access it.          
+          _mi_page_force_abandon(page);
+          return;
+        }
+        else {
+          // abandon and continue
+          _mi_page_force_abandon(page);
+          // it might be freed, reset the slice (note: relies on coalesce setting the slice_offset) 
+          slice = mi_slice_first(slice);
+        }
+      }
+    }
+    slice = slice + slice->slice_count;
+  }
+  mi_assert(segment->used == segment->abandoned);
+  mi_assert(segment->used == 0);
+  if (segment->used == 0) {
+    // all free now
+    mi_segment_free(segment, false, tld);
+  }
+  else {
+    // perform delayed purges
+    mi_segment_try_purge(segment, false /* force? */, tld->stats);
+  }
+}
+
+
+// try abandon segments. 
+// this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use.
+static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) {
+  const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024);
+  if (target == 0 || tld->count <= target) return;
+
+  const size_t min_target = (target > 4 ? (target*3)/4 : target);  // 75%
+
+  // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages
+  for (int i = 0; i < 16 && tld->count >= min_target; i++) {
+    mi_page_t* page = heap->pages[MI_BIN_FULL].first;
+    while (page != NULL && mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX) {
+      page = page->next;
+    }
+    if (page==NULL) {
+      break;
+    }
+    mi_segment_t* segment = _mi_page_segment(page);
+    mi_segment_force_abandon(segment, tld);
+    mi_assert_internal(page != heap->pages[MI_BIN_FULL].first); // as it is just abandoned    
+  }
+}
+
 /* -----------------------------------------------------------
    Reclaim or allocate
 ----------------------------------------------------------- */
@@ -1375,6 +1451,9 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
 {
   mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
 
+  // try to abandon some segments to increase reuse between threads
+  mi_segments_try_abandon(heap,tld);
+
   // 1. try to reclaim an abandoned segment
   bool reclaimed;
   mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);

From 19ce2c6461ffa63583f57c2558e9c7f9979dadaa Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 11 Oct 2024 10:44:43 -0700
Subject: [PATCH 239/352] restore randomization when trying to reclaim
 abandoned segments

---
 src/arena-abandon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena-abandon.c b/src/arena-abandon.c
index 84b9f72c..eaa8c7c9 100644
--- a/src/arena-abandon.c
+++ b/src/arena-abandon.c
@@ -192,7 +192,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool vi
   else {
     // otherwise visit all starting at a random location
     if (abandoned_count > abandoned_list_count && max_arena > 0) {
-      current->start = 0; // (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
+      current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
       current->end = current->start + max_arena;
     }
     else {

From 81da26d7d30c87bc0f094c91fbbae39513d2d35a Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 11 Oct 2024 10:52:35 -0700
Subject: [PATCH 240/352] make target test for stealing one less since we are
 about to reclaim_or_alloc a fresh segment

---
 src/segment.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index bb7483f1..3d411f9c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1424,9 +1424,10 @@ static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* t
 // this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use.
 static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) {
   const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024);
-  if (target == 0 || tld->count <= target) return;
+  // we call this when we are about to add a fresh segment so we should be under our target segment count.
+  if (target == 0 || tld->count < target) return;
 
-  const size_t min_target = (target > 4 ? (target*3)/4 : target);  // 75%
+  const size_t min_target = (target > 4 ? (target*3)/4 : target);  // 75% 
 
   // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages
   for (int i = 0; i < 16 && tld->count >= min_target; i++) {

From eda16d7c918b3f172de95bf0453edde6d249a321 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 29 Oct 2024 20:07:35 -0700
Subject: [PATCH 241/352] remove wrong assertion

---
 src/options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index c55e63b1..ed1cf921 100644
--- a/src/options.c
+++ b/src/options.c
@@ -63,6 +63,7 @@ typedef struct mi_option_desc_s {
 #define MI_DEFAULT_ARENA_EAGER_COMMIT 2
 #endif
 
+// in KiB
 #ifndef MI_DEFAULT_ARENA_RESERVE
  #if (MI_INTPTR_SIZE>4)
   #define MI_DEFAULT_ARENA_RESERVE 1024L*1024L
@@ -197,7 +198,6 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma
 }
 
 mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) {
-  mi_assert_internal(mi_option_has_size_in_kib(option));
   const long x = mi_option_get(option);
   size_t size = (x < 0 ? 0 : (size_t)x);
   if (mi_option_has_size_in_kib(option)) {

From 4f46cf7d5a0f7cbd30d0048babd3e67a4226ee53 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 29 Oct 2024 22:40:58 -0700
Subject: [PATCH 242/352] ensure we dont reclaim a segment on a free if that
 would go above the target segment count

---
 src/segment.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/segment.c b/src/segment.c
index 3d411f9c..66ac4bf7 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1261,6 +1261,8 @@ bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
   if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false;  // it is not abandoned
   if (segment->subproc != heap->tld->segments.subproc)  return false;  // only reclaim within the same subprocess
   if (!_mi_heap_memid_is_suitable(heap,segment->memid)) return false;  // don't reclaim between exclusive and non-exclusive arena's
+  const long target = _mi_option_get_fast(mi_option_target_segments_per_thread);
+  if (target > 0 && (size_t)target <= heap->tld->segments.count) return false; // don't reclaim if going above the target count
   // don't reclaim more from a `free` call than half the current segments
   // this is to prevent a pure free-ing thread to start owning too many segments
   // (but not for out-of-arena segments as that is the main way to be reclaimed for those)

From 826425d5ab84f93dc8970ecea9f942d0e32689a0 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sat, 2 Nov 2024 06:24:28 -0700
Subject: [PATCH 243/352] fix merge error, issue #955

---
 test/main-override-static.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/main-override-static.c b/test/main-override-static.c
index 07af1090..b2b6ee20 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -20,12 +20,9 @@ static void test_reserved(void);
 static void negative_stat(void);
 static void alloc_huge(void);
 static void test_heap_walk(void);
-<<<<<<< HEAD
 static void test_heap_arena(void);
 static void test_align(void);
-=======
 static void test_canary_leak(void);
->>>>>>> dev
 // static void test_large_pages(void);
 
 int main() {

From c58990d4eb93ebe05699cc5e6fa1697a050213aa Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 22 Nov 2024 13:55:10 -0800
Subject: [PATCH 244/352] fix syntax error (issue #963)

---
 test/main-override-static.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/main-override-static.c b/test/main-override-static.c
index b2b6ee20..ccaba543 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -252,6 +252,8 @@ static void test_heap_arena(void) {
       break;
     }
   }
+}
+
 static void test_canary_leak(void) {
   char* p = mi_mallocn_tp(char,23);
   for(int i = 0; i < 23; i++) {

From 9c5c628f990735ffc2f626b1cb6d8f26cf8c4701 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 20 Dec 2024 12:58:46 -0800
Subject: [PATCH 245/352] merge from dev

---
 ide/vs2022/mimalloc-test.vcxproj | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj
index a8b36d5e..6e4576fd 100644
--- a/ide/vs2022/mimalloc-test.vcxproj
+++ b/ide/vs2022/mimalloc-test.vcxproj
@@ -272,14 +272,14 @@
       <SubSystem>Console</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\main-override-static.c" />
+  </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="mimalloc.vcxproj">
       <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
     </ProjectReference>
   </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\main-override-static.c" />
-  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>

From 34cdf1a49f092b90b76bbc6a71cb743e1f1985c6 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Wed, 25 Dec 2024 13:56:38 -0800
Subject: [PATCH 246/352] fix eager delayed setting

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index a93ea218..1390e77a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -886,7 +886,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
   // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
   const bool eager_delay = (// !_mi_os_has_overcommit() &&             // never delay on overcommit systems
                             _mi_current_thread_count() > 1 &&       // do not delay for the first N threads
-                            tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
+                            tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager || (required > 0);
 

From 17dd7e1901e850229aaf131e26f21b99ab49714a Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Fri, 3 Jan 2025 18:45:00 -0800
Subject: [PATCH 247/352] bump version to 2.1.9 for further development

---
 cmake/mimalloc-config-version.cmake | 2 +-
 include/mimalloc.h                  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index 3d94fafc..f3ed36ab 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
 set(mi_version_major 2)
 set(mi_version_minor 1)
-set(mi_version_patch 8)
+set(mi_version_patch 9)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index fc6c75fc..bd91db43 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 218   // major + 2 digits minor
+#define MI_MALLOC_VERSION 219   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes

From 34b5d3c7792ec42260a197a595e3bb6ba3344c00 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 10 Jan 2025 09:53:11 -0800
Subject: [PATCH 248/352] update vcpkg hash

---
 contrib/vcpkg/portfile.cmake | 4 ++--
 contrib/vcpkg/readme.md      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake
index 058ce985..faa4c542 100644
--- a/contrib/vcpkg/portfile.cmake
+++ b/contrib/vcpkg/portfile.cmake
@@ -5,11 +5,11 @@ vcpkg_from_github(
 
   # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1).
   # REF "v${VERSION}"
-  REF be05b232e8a51e076aae6d8f4a5c3049ce51cb01
+  REF 191ea046e4213e1a59652b3f4975219115ce1bed
 
   # The sha512 is the hash of the tar.gz bundle.
   # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=<dir of this file>` and copy the sha from the error message.)
-  SHA512 24f640db050d6263e557fe9d024e6c0435762118605c0d04801efbcb32e96382b0b995000715fc0c2dcd67c67825a100a6690ecf0ef097b0a3ae107a82d74f7d
+  SHA512 d35926d368eb89b1688fafe22192c44e349ae78553a3a8def78bca847adff8a29e388a92027f03bd4fb6d6b4c906c70e9fd962bd539fbc8ef383fac95f64d8cd
 )
 
 vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
diff --git a/contrib/vcpkg/readme.md b/contrib/vcpkg/readme.md
index b1f6047c..014f2867 100644
--- a/contrib/vcpkg/readme.md
+++ b/contrib/vcpkg/readme.md
@@ -9,7 +9,7 @@ to check out a specific commit, version, or branch of mimalloc, or set further o
 You can install such custom port as:
 
 ```sh
-$ vcpkg install mimalloc[override] --recurse --overlay-ports=./contrib/vcpkg
+$ vcpkg install "mimalloc[override]" --recurse --overlay-ports=./contrib/vcpkg
 ```
 
 This will also show the correct sha512 hash if you use a custom version.

From e2db21e9ba9fb9172b7b0aa0fe9b8742525e8774 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 13 Jan 2025 16:55:56 -0800
Subject: [PATCH 249/352] remove INTERFACE_INCLUDE_DIRECTORIES

---
 contrib/vcpkg/portfile.cmake            | 4 ++--
 contrib/vcpkg/vcpkg-cmake-wrapper.cmake | 1 -
 contrib/vcpkg/vcpkg.json                | 4 ++--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake
index faa4c542..ca746763 100644
--- a/contrib/vcpkg/portfile.cmake
+++ b/contrib/vcpkg/portfile.cmake
@@ -5,11 +5,11 @@ vcpkg_from_github(
 
   # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1).
   # REF "v${VERSION}"
-  REF 191ea046e4213e1a59652b3f4975219115ce1bed
+  REF 03e501bddbf99a7a0688437172d914d079bc445a
 
   # The sha512 is the hash of the tar.gz bundle.
   # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=<dir of this file>` and copy the sha from the error message.)
-  SHA512 d35926d368eb89b1688fafe22192c44e349ae78553a3a8def78bca847adff8a29e388a92027f03bd4fb6d6b4c906c70e9fd962bd539fbc8ef383fac95f64d8cd
+  SHA512 77bc7459baf517d8facfa7b3165709e392066091a47a5fa60498e6d9f3dcb308bc047fa743849a40803264c2f2f6e4c19e8e3ae04689f98a816bd1d0eed79ede
 )
 
 vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
diff --git a/contrib/vcpkg/vcpkg-cmake-wrapper.cmake b/contrib/vcpkg/vcpkg-cmake-wrapper.cmake
index 6b917347..1b355722 100644
--- a/contrib/vcpkg/vcpkg-cmake-wrapper.cmake
+++ b/contrib/vcpkg/vcpkg-cmake-wrapper.cmake
@@ -17,5 +17,4 @@ endif()
 if(TARGET mimalloc-static AND NOT TARGET mimalloc)
   add_library(mimalloc INTERFACE IMPORTED)
   set_target_properties(mimalloc PROPERTIES INTERFACE_LINK_LIBRARIES mimalloc-static)
-  set_target_properties(mimalloc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES mimalloc-static)
 endif()
diff --git a/contrib/vcpkg/vcpkg.json b/contrib/vcpkg/vcpkg.json
index bdbe9ba1..9adfc4f8 100644
--- a/contrib/vcpkg/vcpkg.json
+++ b/contrib/vcpkg/vcpkg.json
@@ -1,7 +1,7 @@
 {
   "name": "mimalloc",
-  "version": "1.9.2",
-  "port-version": 2,
+  "version": "2.2.2",
+  "port-version": 1,
   "description": "Compact general purpose allocator with excellent performance",
   "homepage": "https://github.com/microsoft/mimalloc",
   "license": "MIT",

From 0ef19762fec360e87d63ce91751cd3778d10ff05 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 13 Jan 2025 16:57:17 -0800
Subject: [PATCH 250/352] bump vcpkg sha

---
 contrib/vcpkg/portfile.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake
index ca746763..9d1c2cd9 100644
--- a/contrib/vcpkg/portfile.cmake
+++ b/contrib/vcpkg/portfile.cmake
@@ -5,11 +5,11 @@ vcpkg_from_github(
 
   # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1).
   # REF "v${VERSION}"
-  REF 03e501bddbf99a7a0688437172d914d079bc445a
+  REF e2db21e9ba9fb9172b7b0aa0fe9b8742525e8774
 
   # The sha512 is the hash of the tar.gz bundle.
   # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=<dir of this file>` and copy the sha from the error message.)
-  SHA512 77bc7459baf517d8facfa7b3165709e392066091a47a5fa60498e6d9f3dcb308bc047fa743849a40803264c2f2f6e4c19e8e3ae04689f98a816bd1d0eed79ede
+  SHA512 8cbb601fdf8b46dd6a9c0d314d6da9d4960699853829e96d2470753867f90689fb4caeaf30d628943fd388670dc11902dbecc9cc7c329b99a510524a09bdb612
 )
 
 vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS

From a24d71f37418c709dc0f3bdaaab06e53a5d4ca1c Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sun, 2 Mar 2025 17:10:24 -0800
Subject: [PATCH 251/352] fix compile warning

---
 src/page.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/page.c b/src/page.c
index 5b9e7c40..8db2463f 100644
--- a/src/page.c
+++ b/src/page.c
@@ -943,8 +943,8 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t
     }
     else */
     {
-      mi_heap_stat_increase(heap, malloc_huge, bsize);
-      mi_heap_stat_counter_increase(heap, malloc_huge_count, 1);
+      _mi_stat_increase(&heap->tld->stats.malloc_huge, bsize);
+      _mi_stat_counter_increase(&heap->tld->stats.malloc_huge_count, 1);
     }
   }
   return page;

From 2fc6b14bab0090eb84dd1f7a3f24d1e67918e3ff Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Thu, 6 Mar 2025 21:03:51 -0800
Subject: [PATCH 252/352] bump version to 1.9.3 for further development

---
 cmake/mimalloc-config-version.cmake | 2 +-
 include/mimalloc.h                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index 7f3bd631..aeea621f 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
 set(mi_version_major 1)
 set(mi_version_minor 9)
-set(mi_version_patch 2)
+set(mi_version_patch 3)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 4e9c3156..8ccfcec3 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 192   // major + 2 digits minor
+#define MI_MALLOC_VERSION 193   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes

From 2b895f4e97aa089db3e9012708a5c26492cce88e Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Thu, 6 Mar 2025 21:04:32 -0800
Subject: [PATCH 253/352] bump version to 2.2.3 for further development

---
 cmake/mimalloc-config-version.cmake | 2 +-
 include/mimalloc.h                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index 286ec0ba..daac7a5d 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
 set(mi_version_major 2)
 set(mi_version_minor 2)
-set(mi_version_patch 2)
+set(mi_version_patch 3)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index ae6ae262..ff6f0568 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 222   // major + 2 digits minor
+#define MI_MALLOC_VERSION 223   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes

From 4aae566191b9443d53995245b637ce28d617710a Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 10 Mar 2025 12:17:46 -0700
Subject: [PATCH 254/352] fix link error with msvc in C mode (issue #1030)

---
 ide/vs2022/mimalloc-lib.vcxproj | 2 +-
 include/mimalloc/atomic.h       | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ide/vs2022/mimalloc-lib.vcxproj b/ide/vs2022/mimalloc-lib.vcxproj
index abdac1d1..95b516ec 100644
--- a/ide/vs2022/mimalloc-lib.vcxproj
+++ b/ide/vs2022/mimalloc-lib.vcxproj
@@ -308,7 +308,7 @@
       <WholeProgramOptimization>false</WholeProgramOptimization>
       <BufferSecurityCheck>false</BufferSecurityCheck>
       <InlineFunctionExpansion>Default</InlineFunctionExpansion>
-      <CompileAs>CompileAsCpp</CompileAs>
+      <CompileAs>CompileAsC</CompileAs>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <LanguageStandard>stdcpp20</LanguageStandard>
     </ClCompile>
diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h
index 6eaa6f99..2984f50f 100644
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@@ -266,6 +266,13 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int6
   return current;
 #endif
 }
+static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
+  const int64_t add = *padd;
+  if (add != 0) {
+    mi_atomic_addi64_relaxed((volatile _Atomic(int64_t)*)p, add);
+  }
+}
+
 static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
   int64_t current;
   do {

From f11732acdfe3e33f64f4aa3e7db657ffd80dea8f Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Mon, 10 Mar 2025 12:39:09 -0700
Subject: [PATCH 255/352] set default compilation to c++ mode on msvc

---
 ide/vs2022/mimalloc-lib.vcxproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ide/vs2022/mimalloc-lib.vcxproj b/ide/vs2022/mimalloc-lib.vcxproj
index 95b516ec..abdac1d1 100644
--- a/ide/vs2022/mimalloc-lib.vcxproj
+++ b/ide/vs2022/mimalloc-lib.vcxproj
@@ -308,7 +308,7 @@
       <WholeProgramOptimization>false</WholeProgramOptimization>
       <BufferSecurityCheck>false</BufferSecurityCheck>
       <InlineFunctionExpansion>Default</InlineFunctionExpansion>
-      <CompileAs>CompileAsC</CompileAs>
+      <CompileAs>CompileAsCpp</CompileAs>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <LanguageStandard>stdcpp20</LanguageStandard>
     </ClCompile>

From 9a35bca55645a131092a91797f851794423175f6 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 16:12:17 -0700
Subject: [PATCH 256/352] possible fix for wrong accounting of committed bytes
 (issue #1035)

---
 src/arena.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 9d40a271..1f6f6d9d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -648,15 +648,16 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
   if (p==NULL) return;
   if (size==0) return;
   const bool all_committed = (committed_size == size);
+  const bool decommitted_size = (committed_size <= size ? size - committed_size : 0);
 
   // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
   mi_track_mem_undefined(p,size);
 
   if (mi_memkind_is_os(memid.memkind)) {
     // was a direct OS allocation, pass through
-    if (!all_committed && committed_size > 0) {
-      // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size)
-      _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
+    if (!all_committed && decommitted_size > 0) {
+      // if partially committed, adjust the committed stats (as `_mi_os_free` will decrease commit by the full size)
+      _mi_stat_increase(&_mi_stats_main.committed, decommitted_size);
     }
     _mi_os_free(p, size, memid);
   }
@@ -695,7 +696,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
         mi_track_mem_noaccess(p,size);
         if (committed_size > 0) {
           // if partially committed, adjust the committed stats (is it will be recommitted when re-using)
-          // in the delayed purge, we now need to not count a decommit if the range is not marked as committed.
+          // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed.
           _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
         }
         // note: if not all committed, it may be that the purge will reset/decommit the entire range

From 26fa8be42759ac39f7b4869b4e0936bd35a8be17 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 18:50:53 -0700
Subject: [PATCH 257/352] improved accounting of committed bytes (issue #1035)

---
 include/mimalloc/internal.h | 16 +++++++++++
 src/arena.c                 | 39 ++++++++++++++++---------
 src/bitmap.c                | 28 +++++++++++-------
 src/bitmap.h                |  4 +--
 src/libc.c                  | 57 +++++++++++++++++++++++++++++++++++++
 src/stats.c                 |  1 +
 6 files changed, 119 insertions(+), 26 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 106da0d1..5b3e7e23 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -127,6 +127,7 @@ bool        _mi_os_has_virtual_reserve(void);
 
 bool        _mi_os_reset(void* addr, size_t size);
 bool        _mi_os_commit(void* p, size_t size, bool* is_zero);
+bool        _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
 bool        _mi_os_decommit(void* addr, size_t size);
 bool        _mi_os_protect(void* addr, size_t size);
 bool        _mi_os_unprotect(void* addr, size_t size);
@@ -947,6 +948,21 @@ static inline size_t mi_bsr(size_t x) {
   return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
 }
 
+size_t _mi_popcount_generic(size_t x);
+
+static inline size_t mi_popcount(size_t x) {
+  if (x<=1) return x;
+  if (x==SIZE_MAX) return MI_SIZE_BITS;
+  #if defined(__GNUC__)
+    #if (SIZE_MAX == ULONG_MAX)
+      return __builtin_popcountl(x);
+    #else
+      return __builtin_popcountll(x);
+    #endif
+  #else
+    return _mi_popcount_generic(x);
+  #endif
+}
 
 // ---------------------------------------------------------------------------------
 // Provide our own `_mi_memcpy` for potential performance optimizations.
diff --git a/src/arena.c b/src/arena.c
index 1f6f6d9d..a7c20764 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -255,7 +255,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
 
   // set the dirty bits (todo: no need for an atomic op here?)
   if (arena->memid.initially_zero && arena->blocks_dirty != NULL) {
-    memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
+    memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL);
   }
 
   // set commit state
@@ -267,10 +267,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
     // commit requested, but the range may not be committed as a whole: ensure it is committed now
     memid->initially_committed = true;
     bool any_uncommitted;
-    _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
+    size_t already_committed = 0;
+    _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed);
     if (any_uncommitted) {
+      mi_assert_internal(already_committed < needed_bcount);
+      const size_t commit_size = mi_arena_block_size(needed_bcount);
+      const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed);
       bool commit_zero = false;
-      if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) {
+      if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) {
         memid->initially_committed = false;
       }
       else {
@@ -280,7 +284,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
   }
   else {
     // no need to commit, but check if already fully committed
-    memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
+    size_t already_committed = 0;
+    memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed);
+    if (!memid->initially_committed && already_committed > 0) {
+      // partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted.
+      mi_assert_internal(already_committed < needed_bcount);
+      _mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed));
+      _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
+    }
   }
 
   return p;
@@ -464,17 +475,19 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks)
   const size_t size = mi_arena_block_size(blocks);
   void* const p = mi_arena_block_start(arena, bitmap_idx);
   bool needs_recommit;
-  if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
+  size_t already_committed = 0;
+  if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) {
     // all blocks are committed, we can purge freely
+    mi_assert_internal(already_committed == blocks);
     needs_recommit = _mi_os_purge(p, size);
   }
   else {
     // some blocks are not committed -- this can happen when a partially committed block is freed
     // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
-    // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
-    // and also undo the decommit stats (as it was already adjusted)
+    // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory).
+    mi_assert_internal(already_committed < blocks);
     mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
-    needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0);    
+    needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed));    
   }
 
   // clear the purged blocks
@@ -508,7 +521,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
     else {
       // already an expiration was set
     }
-    _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL);
+    _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL);
   }
 }
 
@@ -648,7 +661,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
   if (p==NULL) return;
   if (size==0) return;
   const bool all_committed = (committed_size == size);
-  const bool decommitted_size = (committed_size <= size ? size - committed_size : 0);
+  const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0);
 
   // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
   mi_track_mem_undefined(p,size);
@@ -691,14 +704,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
       mi_assert_internal(arena->blocks_purge != NULL);
 
       if (!all_committed) {
-        // mark the entire range as no longer committed (so we recommit the full range when re-using)
+        // mark the entire range as no longer committed (so we will recommit the full range when re-using)
         _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
         mi_track_mem_noaccess(p,size);
-        if (committed_size > 0) {
+        //if (committed_size > 0) {
           // if partially committed, adjust the committed stats (is it will be recommitted when re-using)
           // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed.
           _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
-        }
+        //}
         // note: if not all committed, it may be that the purge will reset/decommit the entire range
         // that contains already decommitted parts. Since purge consistently uses reset or decommit that
         // works (as we should never reset decommitted parts).
diff --git a/src/bitmap.c b/src/bitmap.c
index 9ef784d6..50f4df2b 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -351,7 +351,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
 
 // Set `count` bits at `bitmap_idx` to 1 atomically
 // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
+bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) {
   size_t idx = mi_bitmap_index_field(bitmap_idx);
   size_t pre_mask;
   size_t mid_mask;
@@ -359,28 +359,31 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
   size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
   bool all_zero = true;
   bool any_zero = false;
+  size_t one_count = 0;
   _Atomic(size_t)*field = &bitmap[idx];
   size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
-  if ((prev & pre_mask) != 0) all_zero = false;
+  if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); }
   if ((prev & pre_mask) != pre_mask) any_zero = true;
   while (mid_count-- > 0) {
     prev = mi_atomic_or_acq_rel(field++, mid_mask);
-    if ((prev & mid_mask) != 0) all_zero = false;
+    if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); }
     if ((prev & mid_mask) != mid_mask) any_zero = true;
   }
   if (post_mask!=0) {
     prev = mi_atomic_or_acq_rel(field, post_mask);
-    if ((prev & post_mask) != 0) all_zero = false;
+    if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); }
     if ((prev & post_mask) != post_mask) any_zero = true;
   }
   if (pany_zero != NULL) { *pany_zero = any_zero; }
+  if (already_set != NULL) { *already_set = one_count; };
+  mi_assert_internal(all_zero ? one_count == 0 : one_count <= count);
   return all_zero;
 }
 
 
 // Returns `true` if all `count` bits were 1.
 // `any_ones` is `true` if there was at least one bit set to one.
-static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
+static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) {
   size_t idx = mi_bitmap_index_field(bitmap_idx);
   size_t pre_mask;
   size_t mid_mask;
@@ -388,30 +391,33 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
   size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
   bool all_ones = true;
   bool any_ones = false;
+  size_t one_count = 0;
   mi_bitmap_field_t* field = &bitmap[idx];
   size_t prev = mi_atomic_load_relaxed(field++);
   if ((prev & pre_mask) != pre_mask) all_ones = false;
-  if ((prev & pre_mask) != 0) any_ones = true;
+  if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); }
   while (mid_count-- > 0) {
     prev = mi_atomic_load_relaxed(field++);
     if ((prev & mid_mask) != mid_mask) all_ones = false;
-    if ((prev & mid_mask) != 0) any_ones = true;
+    if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); }
   }
   if (post_mask!=0) {
     prev = mi_atomic_load_relaxed(field);
     if ((prev & post_mask) != post_mask) all_ones = false;
-    if ((prev & post_mask) != 0) any_ones = true;
+    if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); }
   }
   if (pany_ones != NULL) { *pany_ones = any_ones; }
+  if (already_set != NULL) { *already_set = one_count; }
+  mi_assert_internal(all_ones ? one_count == count : one_count < count);
   return all_ones;
 }
 
-bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
-  return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
+bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) {
+  return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set);
 }
 
 bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
   bool any_ones;
-  mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
+  mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL);
   return any_ones;
 }
diff --git a/src/bitmap.h b/src/bitmap.h
index d60668cb..60b38815 100644
--- a/src/bitmap.h
+++ b/src/bitmap.h
@@ -102,9 +102,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
 
 // Set `count` bits at `bitmap_idx` to 1 atomically
 // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
+bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set);
 
-bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set);
 bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
 
 #endif
diff --git a/src/libc.c b/src/libc.c
index 1bd97aa3..52d095eb 100644
--- a/src/libc.c
+++ b/src/libc.c
@@ -275,3 +275,60 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
   va_end(args);
   return written;
 }
+
+
+#if MI_SIZE_SIZE == 4
+#define mi_mask_even_bits32      (0x55555555)
+#define mi_mask_even_pairs32     (0x33333333)
+#define mi_mask_even_nibbles32   (0x0F0F0F0F)
+
+// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
+static size_t mi_byte_sum32(uint32_t x) {
+  // perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
+  x += (x << 8);
+  x += (x << 16);
+  return (size_t)(x >> 24);
+}
+
+static size_t mi_popcount_generic32(uint32_t x) {
+  // first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
+  // in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
+  // into the lower bit-pair:
+  x = x - ((x >> 1) & mi_mask_even_bits32);
+  // add the 2-bit pair results
+  x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
+  // add the 4-bit nibble results
+  x = (x + (x >> 4)) & mi_mask_even_nibbles32;
+  // each byte now has a count of its bits, we can sum them now:
+  return mi_byte_sum32(x);
+}
+
+mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
+  return mi_popcount_generic32(x);
+}
+
+#else
+#define mi_mask_even_bits64      (0x5555555555555555)
+#define mi_mask_even_pairs64     (0x3333333333333333)
+#define mi_mask_even_nibbles64   (0x0F0F0F0F0F0F0F0F)
+
+// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
+static size_t mi_byte_sum64(uint64_t x) {
+  x += (x << 8);
+  x += (x << 16);
+  x += (x << 32);
+  return (size_t)(x >> 56);
+}
+
+static size_t mi_popcount_generic64(uint64_t x) {
+  x = x - ((x >> 1) & mi_mask_even_bits64);
+  x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
+  x = (x + (x >> 4)) & mi_mask_even_nibbles64;
+  return mi_byte_sum64(x);
+}
+
+mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
+  return mi_popcount_generic64(x);
+}
+#endif
+
diff --git a/src/stats.c b/src/stats.c
index 1cfc3104..6a480816 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -30,6 +30,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
   {
     // add atomically (for abandoned pages)
     int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
+    // if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); };
     mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
     if (amount > 0) {
       mi_atomic_addi64_relaxed(&stat->total,amount);

From 47bf3a5b1b5dd1f85a1ff75bb046f9f8e6dfcdb1 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 19:06:21 -0700
Subject: [PATCH 258/352] potential fix for sporadic assertion failure on
 random returning 0 (issue #1039)

---
 src/random.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/random.c b/src/random.c
index 4fc8b2f8..f17698ba 100644
--- a/src/random.c
+++ b/src/random.c
@@ -143,13 +143,17 @@ void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
 
 uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
   mi_assert_internal(mi_random_is_initialized(ctx));
-  #if MI_INTPTR_SIZE <= 4
-    return chacha_next32(ctx);
-  #elif MI_INTPTR_SIZE == 8
-    return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
-  #else
-  # error "define mi_random_next for this platform"
-  #endif
+  uintptr_t r;
+  do {
+    #if MI_INTPTR_SIZE <= 4
+    r = chacha_next32(ctx);
+    #elif MI_INTPTR_SIZE == 8
+    r = (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
+    #else
+    # error "define mi_random_next for this platform"
+    #endif
+  } while (r==0);
+  return r;
 }
 
 
@@ -163,7 +167,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
   x ^= _mi_prim_clock_now();  
   // and do a few randomization steps
   uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
-  for (uintptr_t i = 0; i < max; i++) {
+  for (uintptr_t i = 0; i < max || x==0; i++, x++) {
     x = _mi_random_shuffle(x);
   }
   mi_assert_internal(x != 0);
@@ -179,7 +183,7 @@ static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) {
     if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); }
     #endif
     uintptr_t x = _mi_os_random_weak(0);
-    for (size_t i = 0; i < 8; i++) {  // key is eight 32-bit words.
+    for (size_t i = 0; i < 8; i++, x++) {  // key is eight 32-bit words.
       x = _mi_random_shuffle(x);
       ((uint32_t*)key)[i] = (uint32_t)x;
     }

From 1aa88e0d9ad631ce7ed737a41aca873a61534939 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 19:11:38 -0700
Subject: [PATCH 259/352] try to fix pipeline trigger

---
 azure-pipelines.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index a803cd15..c4dc1627 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -7,9 +7,9 @@ trigger:
   branches:
     include:
     - master
-    - dev
-    - dev2
     - dev3
+    - dev2
+    - dev
   tags:
     include:
     - v*

From afbc581f8dfdc92f69faa2ec57e18128c54fcd44 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 19:16:10 -0700
Subject: [PATCH 260/352] add Windows x86 to the build pipeline

---
 azure-pipelines.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index c4dc1627..25d4a6e0 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -34,6 +34,14 @@ jobs:
         BuildType: secure
         cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
         MSBuildConfiguration: Release
+      Debug x86:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -A Win32
+        MSBuildConfiguration: Debug
+      Release x86:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32
+        MSBuildConfiguration: Release
   steps:
   - task: CMake@1
     inputs:

From b2dcab58f7d1696795bae0e5bf33ffc229662ee9 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 20:20:36 -0700
Subject: [PATCH 261/352] fix assertion failure (issue #1031)

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index e2730b7f..75f8dacb 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -523,7 +523,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
 void _mi_segments_collect(bool force, mi_segments_tld_t* tld) {
   mi_pages_try_purge(force,tld);
   #if MI_DEBUG>=2
-  if (!_mi_is_main_thread()) {
+  if (!_mi_is_main_thread() && force) {
     mi_assert_internal(tld->pages_purge.first == NULL);
     mi_assert_internal(tld->pages_purge.last == NULL);
   }

From 7eafaa968598fc6b1261103f0f53b0db2bc56139 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 20:22:02 -0700
Subject: [PATCH 262/352] fix visibility warning (issue #1031)

---
 src/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/init.c b/src/init.c
index 215eed20..8a48ae5e 100644
--- a/src/init.c
+++ b/src/init.c
@@ -95,7 +95,7 @@ const mi_page_t _mi_page_empty = {
 // may lead to allocation itself on some platforms)
 // --------------------------------------------------------
 
-mi_decl_hidden mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
+mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   NULL,
   MI_ATOMIC_VAR_INIT(NULL),
   0,                // tid

From 660d749d77822e54b77acecf82f1aa8f348625ae Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 20:29:29 -0700
Subject: [PATCH 263/352] do not default to MI_DEBUG=2 in release mode builds
 even when NDEBUG is not defined by defininig MI_BUILD_RELEASE (issue #1037)

---
 CMakeLists.txt           | 4 +++-
 include/mimalloc/types.h | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0d780fa1..2b1292cc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -532,7 +532,9 @@ if(MI_TRACK_ASAN)
 endif()
 string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
 list(APPEND mi_defines "MI_CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE_LC}")  #todo: multi-config project needs $<CONFIG> ?
-if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$"))
+if(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$")
+  list(APPEND mi_defines MI_BUILD_RELEASE)
+else()
   set(mi_libname "${mi_libname}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version
 endif()
 
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 9f743149..5bcdb07f 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -66,10 +66,10 @@ terms of the MIT license. A copy of the license can be found in the file
 // #define MI_DEBUG 2  // + internal assertion checks
 // #define MI_DEBUG 3  // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
 #if !defined(MI_DEBUG)
-#if !defined(NDEBUG) || defined(_DEBUG)
-#define MI_DEBUG 2
-#else
+#if defined(MI_BUILD_RELEASE) || defined(NDEBUG)
 #define MI_DEBUG 0
+#else
+#define MI_DEBUG 2
 #endif
 #endif
 

From 891f9f4cf6afbd213d7260b880795af523646c11 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 19 Mar 2025 20:38:19 -0700
Subject: [PATCH 264/352] fix conflict marker (issue #1038)

---
 SECURITY.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 0ad51aa0..b3c89efc 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -28,11 +28,7 @@ Please include the requested information listed below (as much as you can provid
 
 This information will help us triage your report more quickly.
 
-<<<<<<< HEAD
-If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
-=======
 If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
->>>>>>> dev-slice
 
 ## Preferred Languages
 

From cf08c27d2b6b82232dcfc0882642b5983efd95f9 Mon Sep 17 00:00:00 2001
From: Jo Bates <29763794+jbatez@users.noreply.github.com>
Date: Thu, 20 Mar 2025 11:24:59 -0700
Subject: [PATCH 265/352] support MI_OPT_ARCH when using
 CMAKE_OSX_ARCHITECTURES with non-Apple Clang

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b1292cc..b7154b20 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -434,7 +434,7 @@ endif()
 
 if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
   if(MI_OPT_ARCH)
-    if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES)   # to support multi-arch binaries (#999)
+    if(APPLE AND CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_OSX_ARCHITECTURES)   # to support multi-arch binaries (#999)
       if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
         list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a")
       endif()

From 01ee3568c1a2d82779887577e4427b8d65df47ce Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 21 Mar 2025 16:19:54 -0700
Subject: [PATCH 266/352] name anonymous mmap address ranges for debugging on
 Linux (based on PR #1032 by @zhuker)

---
 src/prim/unix/prim.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 8e3180e6..994dbb93 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -205,14 +205,24 @@ static int unix_madvise(void* addr, size_t size, int advice) {
   return (res==0 ? 0 : errno);
 }
 
-static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
+static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) {
+  void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */);
+  #if (defined(__linux__) || defined(__ANDROID__))     
+  if (p!=MAP_FAILED && p!=NULL) {
+    prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc");
+  }
+  #endif
+  return p;
+}
+
+static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
   MI_UNUSED(try_alignment);
   void* p = NULL;
   #if defined(MAP_ALIGNED)  // BSD
   if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
     size_t n = mi_bsr(try_alignment);
     if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
-      p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
+      p = unix_mmap_prim(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd);
       if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
         int err = errno;
         _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr);
@@ -223,7 +233,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
   }
   #elif defined(MAP_ALIGN)  // Solaris
   if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
-    p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);  // addr parameter is the required alignment
+    p = unix_mmap_prim((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd);  // addr parameter is the required alignment
     if (p!=MAP_FAILED) return p;
     // fall back to regular mmap
   }
@@ -233,7 +243,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
   if (addr == NULL) {
     void* hint = _mi_os_get_aligned_hint(try_alignment, size);
     if (hint != NULL) {
-      p = mmap(hint, size, protect_flags, flags, fd, 0);
+      p = unix_mmap_prim(hint, size, protect_flags, flags, fd);
       if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
         #if MI_TRACK_ENABLED  // asan sometimes does not instrument errno correctly?
         int err = 0;
@@ -248,7 +258,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
   }
   #endif
   // regular mmap
-  p = mmap(addr, size, protect_flags, flags, fd, 0);
+  p = unix_mmap_prim(addr, size, protect_flags, flags, fd);
   if (p!=MAP_FAILED) return p;
   // failed to allocate
   return NULL;
@@ -319,7 +329,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
       if (large_only || lflags != flags) {
         // try large OS page allocation
         *is_large = true;
-        p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
+        p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
         #ifdef MAP_HUGE_1GB
         if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
           mi_huge_pages_available = false; // don't try huge 1GiB pages again
@@ -327,7 +337,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
             _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
           }
           lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
-          p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
+          p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
         }
         #endif
         if (large_only) return p;
@@ -340,7 +350,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
   // regular allocation
   if (p == NULL) {
     *is_large = false;
-    p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd);
+    p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, flags, fd);
     if (p != NULL) {
       #if defined(MADV_HUGEPAGE)
       // Many Linux systems don't allow MAP_HUGETLB but they support instead

From 6ed451c555da0725bd660440a584188370f46b8b Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 21 Mar 2025 16:48:50 -0700
Subject: [PATCH 267/352] fix linux compile by including linux/prctl.h

---
 src/prim/unix/prim.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 994dbb93..32004fe4 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -31,11 +31,12 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #if defined(__linux__)
   #include <features.h>
+  #include <linux/prctl.h>  // PR_SET_VMA
   //#if defined(MI_NO_THP)
-  #include <sys/prctl.h>  // THP disable
+  #include <sys/prctl.h>    // THP disable
   //#endif
   #if defined(__GLIBC__)
-  #include <linux/mman.h> // linux mmap flags
+  #include <linux/mman.h>   // linux mmap flags
   #else
   #include <sys/mman.h>
   #endif
@@ -207,7 +208,7 @@ static int unix_madvise(void* addr, size_t size, int advice) {
 
 static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) {
   void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */);
-  #if (defined(__linux__) || defined(__ANDROID__))     
+  #if (defined(__linux__) && defined(PR_SET_VMA))
   if (p!=MAP_FAILED && p!=NULL) {
     prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc");
   }

From 02607f2b8d6fa70dfa632d3851930dadeeb5079f Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 21 Mar 2025 17:22:36 -0700
Subject: [PATCH 268/352] reduce test sizes for 32-bit

---
 test/test-api.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/test-api.c b/test/test-api.c
index 15484544..6f5d6722 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -203,7 +203,11 @@ int main(void) {
   CHECK_BODY("malloc-aligned9") { // test large alignments
     bool ok = true;
     void* p[8];
-    size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 };
+    size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 
+      #if SIZE_MAX > UINT32_MAX
+      2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 
+      #endif
+      0 };
     for (int i = 0; i < 28 && ok; i++) {
       int align = (1 << i);
       for (int j = 0; j < 8 && ok; j++) {

From d48bafe2bb63120c1327fe61a13aafd893c97760 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 21 Mar 2025 19:21:41 -0700
Subject: [PATCH 269/352] print statistics nicer

---
 src/stats.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 6a480816..70f16ef3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -215,7 +215,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char*
 
 
 static void mi_print_header(mi_output_fun* out, void* arg ) {
-  _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak   ", "total   ", "current   ", "unit   ", "total#   ");
+  _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak   ", "total   ", "current   ", "block   ", "total#   ");
 }
 
 #if MI_STAT>1
@@ -284,10 +284,10 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   // and print using that
   mi_print_header(out,arg);
   #if MI_STAT>1
-  mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "normal",out,arg);
+  mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "bin",out,arg);
   #endif
   #if MI_STAT
-  mi_stat_print(&stats->malloc_normal, "normal", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg);
+  mi_stat_print(&stats->malloc_normal, "binned", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg);
   mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg);
   mi_stat_count_t total = { 0,0,0 };
   mi_stat_count_add_mt(&total, &stats->malloc_normal);
@@ -295,7 +295,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_print_ex(&total, "total", 1, out, arg, "");
   #endif
   #if MI_STAT>1
-  mi_stat_print_ex(&stats->malloc_requested, "malloc req", 1, out, arg, "");
+  mi_stat_peak_print(&stats->malloc_requested, "malloc req", 1, out, arg);
   _mi_fprintf(out, arg, "\n");
   #endif
   mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");

From a077311a5ec418e2e11c5cb99b82a41c188045b3 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 21 Mar 2025 19:40:44 -0700
Subject: [PATCH 270/352] improve tracking of malloc_requested count

---
 include/mimalloc/types.h |  5 ++++-
 src/alloc-aligned.c      |  3 ++-
 src/alloc.c              | 17 ++++-------------
 src/free.c               | 14 ++++++--------
 src/heap.c               | 10 +++++-----
 src/stats.c              | 28 +++++++++++++++++++++++++++-
 6 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 5bcdb07f..ab697f23 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -625,22 +625,25 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line
 // add to stat keeping track of the peak
 void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
 void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
+void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
 // counters can just be increased
 void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
 
 #if (MI_STAT)
 #define mi_stat_increase(stat,amount)         _mi_stat_increase( &(stat), amount)
 #define mi_stat_decrease(stat,amount)         _mi_stat_decrease( &(stat), amount)
+#define mi_stat_adjust_decrease(stat,amount)  _mi_stat_adjust_decrease( &(stat), amount)
 #define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
 #else
 #define mi_stat_increase(stat,amount)         ((void)0)
 #define mi_stat_decrease(stat,amount)         ((void)0)
+#define mi_stat_adjust_decrease(stat,amount)  ((void)0)
 #define mi_stat_counter_increase(stat,amount) ((void)0)
 #endif
 
 #define mi_heap_stat_counter_increase(heap,stat,amount)  mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
 #define mi_heap_stat_increase(heap,stat,amount)  mi_stat_increase( (heap)->tld->stats.stat, amount)
 #define mi_heap_stat_decrease(heap,stat,amount)  mi_stat_decrease( (heap)->tld->stats.stat, amount)
-
+#define mi_heap_stat_adjust_decrease(heap,stat,amount)  mi_stat_adjust_decrease( (heap)->tld->stats.stat, amount)
 
 #endif
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index d0e691b3..e28cb0de 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -191,10 +191,11 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
       const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
       if mi_likely(is_aligned)
       {
+        void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
         #if MI_STAT>1
+        mi_heap_stat_adjust_decrease(heap, malloc_requested, padsize);
         mi_heap_stat_increase(heap, malloc_requested, size);
         #endif
-        void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
         mi_assert_internal(p != NULL);
         mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
         mi_track_malloc(p,size,zero);
diff --git a/src/alloc.c b/src/alloc.c
index 15867315..0c4e4391 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -30,6 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Note: in release mode the (inlined) routine is about 7 instructions with a single test.
 extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
 {
+  mi_assert_internal(size >= MI_PADDING_SIZE);
   mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
 
   // check the free list
@@ -88,6 +89,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
     #if (MI_STAT>1)
     const size_t bin = _mi_bin(bsize);
     mi_heap_stat_increase(heap, malloc_bins[bin], 1);
+    mi_heap_stat_increase(heap, malloc_requested, size - MI_PADDING_SIZE);
     #endif
   }
   #endif
@@ -146,12 +148,6 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
   void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
   mi_track_malloc(p,size,zero);
 
-  #if MI_STAT>1
-  if (p != NULL) {
-    if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
-    mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p));
-  }
-  #endif
   #if MI_DEBUG>3
   if (p != NULL && zero) {
     mi_assert_expensive(mi_mem_is_zero(p, size));
@@ -188,12 +184,6 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z
     void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment);  // note: size can overflow but it is detected in malloc_generic
     mi_track_malloc(p,size,zero);
 
-    #if MI_STAT>1
-    if (p != NULL) {
-      if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
-      mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p));
-    }
-    #endif
     #if MI_DEBUG>3
     if (p != NULL && zero) {
       mi_assert_expensive(mi_mem_is_zero(p, size));
@@ -666,7 +656,8 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo
   if (p != NULL) {
     if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
     #if MI_STAT>1
-    mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p));
+    mi_heap_stat_adjust_decrease(heap, malloc_requested, req_size);
+    mi_heap_stat_increase(heap, malloc_requested, size);
     #endif
     _mi_stat_counter_increase(&heap->tld->stats.malloc_guarded_count, 1);
   }
diff --git a/src/free.c b/src/free.c
index a1732e8c..7e529530 100644
--- a/src/free.c
+++ b/src/free.c
@@ -514,20 +514,18 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
 // only maintain stats for smaller objects if requested
 #if (MI_STAT>0)
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-#if (MI_STAT < 2)
   MI_UNUSED(block);
-#endif
   mi_heap_t* const heap = mi_heap_get_default();
   const size_t bsize = mi_page_usable_block_size(page);
-#if (MI_STAT>1)
-  const size_t usize = mi_page_usable_size_of(page, block);
-  mi_heap_stat_decrease(heap, malloc_requested, usize);
-#endif
+  // #if (MI_STAT>1)
+  // const size_t usize = mi_page_usable_size_of(page, block);
+  // mi_heap_stat_decrease(heap, malloc_requested, usize);
+  // #endif
   if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
     mi_heap_stat_decrease(heap, malloc_normal, bsize);
-#if (MI_STAT > 1)
+    #if (MI_STAT > 1)
     mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], 1);
-#endif
+    #endif
   }
   else {
     const size_t bpsize = mi_page_block_size(page);  // match stat in page.c:mi_huge_page_alloc
diff --git a/src/heap.c b/src/heap.c
index 7c235a7b..0ea9a2ff 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -331,17 +331,17 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   if (bsize > MI_LARGE_OBJ_SIZE_MAX) {
     mi_heap_stat_decrease(heap, malloc_huge, bsize);
   }
-#if (MI_STAT)
+  #if (MI_STAT>0)
   _mi_page_free_collect(page, false);  // update used count
   const size_t inuse = page->used;
   if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
     mi_heap_stat_decrease(heap, malloc_normal, bsize * inuse);
-#if (MI_STAT>1)
+    #if (MI_STAT>1)
     mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], inuse);
-#endif
+    #endif
   }
-  mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse);  // todo: off for aligned blocks...
-#endif
+  // mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse);  // todo: off for aligned blocks...
+  #endif
 
   /// pretend it is all free now
   mi_assert_internal(mi_page_thread_free(page) == NULL);
diff --git a/src/stats.c b/src/stats.c
index 70f16ef3..07ce7d16 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -62,6 +62,25 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
 }
 
 
+static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) {
+  if (amount == 0) return;
+  if mi_unlikely(mi_is_in_main(stat))
+  {
+    // adjust atomically 
+    mi_atomic_addi64_relaxed(&stat->current, amount);
+    mi_atomic_addi64_relaxed(&stat->total,amount);
+  }
+  else {
+    // adjust local
+    stat->current += amount;
+    stat->total += amount;
+  }
+}
+
+void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_adjust(stat, -((int64_t)amount));
+}
+
 
 // must be thread safe as it is called from stats_merge
 static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) {
@@ -199,6 +218,13 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int
   _mi_fprintf(out, arg, "\n");
 }
 
+static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
+  _mi_fprintf(out, arg, "%10s:", msg);
+  _mi_fprintf(out, arg, "%12s", " ");  // no peak
+  mi_print_amount(stat->total, unit, out, arg);
+  _mi_fprintf(out, arg, "\n");
+}
+
 static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
   _mi_fprintf(out, arg, "%10s:", msg);
   mi_print_amount(stat->total, -1, out, arg);
@@ -295,7 +321,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_print_ex(&total, "total", 1, out, arg, "");
   #endif
   #if MI_STAT>1
-  mi_stat_peak_print(&stats->malloc_requested, "malloc req", 1, out, arg);
+  mi_stat_total_print(&stats->malloc_requested, "malloc req", 1, out, arg);
   _mi_fprintf(out, arg, "\n");
   #endif
   mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");

From 26b792d93b4e8f389a5c724feeabb86038b39e53 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 21 Mar 2025 20:07:16 -0700
Subject: [PATCH 271/352] fix aligned malloc_requested statistic

---
 src/alloc-aligned.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index e28cb0de..8d2bde74 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -192,10 +192,6 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
       if mi_likely(is_aligned)
       {
         void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
-        #if MI_STAT>1
-        mi_heap_stat_adjust_decrease(heap, malloc_requested, padsize);
-        mi_heap_stat_increase(heap, malloc_requested, size);
-        #endif
         mi_assert_internal(p != NULL);
         mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
         mi_track_malloc(p,size,zero);

From 34cc5c8fd9e84fbfdfa45ed5db5b09f74a448a3b Mon Sep 17 00:00:00 2001
From: Peiyuan Song <squallatf@gmail.com>
Date: Mon, 24 Mar 2025 09:39:42 +0800
Subject: [PATCH 272/352] remove the `lib` prefix when enabling
 mimalloc-redirect for mingw

---
 CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7154b20..283af66d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -594,6 +594,9 @@ if(MI_BUILD_SHARED)
     # install(FILES "$<TARGET_FILE_DIR:mimalloc>/${mi_libname}.dll.pdb" DESTINATION ${CMAKE_INSTALL_LIBDIR})
   endif()
   if(WIN32 AND MI_WIN_REDIRECT)
+    if(MINGW)
+      set_property(TARGET mimalloc PROPERTY PREFIX "")
+    endif()
     # On windows, link and copy the mimalloc redirection dll too.
     if(CMAKE_GENERATOR_PLATFORM STREQUAL "arm64ec")
       set(MIMALLOC_REDIRECT_SUFFIX "-arm64ec")

From 797ca19ba93bb92f9c7c97923aa3e43485cbb3de Mon Sep 17 00:00:00 2001
From: Maksim Bondarenkov <119937608+ognevny@users.noreply.github.com>
Date: Mon, 24 Mar 2025 08:35:15 +0300
Subject: [PATCH 273/352] cmake: don't change properties of import lib on
 Windows/MinGW

CMake handles import lib for it automatically, and using `.dll.lib` extension is MSVC-specific hack
---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7154b20..46435eca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -584,7 +584,7 @@ if(MI_BUILD_SHARED)
   install(TARGETS mimalloc EXPORT mimalloc ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
   install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
 
-  if(WIN32)
+  if(WIN32 AND NOT MINGW)
     # On windows, the import library name for the dll would clash with the static mimalloc.lib library
     # so we postfix the dll import library with `.dll.lib` (and also the .pdb debug file)
     set_property(TARGET mimalloc PROPERTY ARCHIVE_OUTPUT_NAME "${mi_libname}.dll" )

From 632eab958bb91fb8bd273efe58995023e5087aaa Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 25 Mar 2025 16:02:29 -0700
Subject: [PATCH 274/352] fix for atomic_yield on arm 32-bit, issue #1046

---
 include/mimalloc/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h
index 2984f50f..c0425f67 100644
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@@ -380,7 +380,7 @@ static inline void mi_atomic_yield(void) {
 static inline void mi_atomic_yield(void) {
   __asm__ volatile("wfe");
 }
-#elif (defined(__arm__) && __ARM_ARCH__ >= 7)
+#elif (defined(__arm__) && __ARM_ARCH >= 7)
 static inline void mi_atomic_yield(void) {
   __asm__ volatile("yield" ::: "memory");
 }

From 23fbee7ec69e9bf336a236ad7e85e0bbe41cfb5d Mon Sep 17 00:00:00 2001
From: Vincent Fazio <vfazio@gmail.com>
Date: Fri, 28 Mar 2025 07:58:49 -0500
Subject: [PATCH 275/352] atomic: fix mi_atomic_yield for big-endian arm32

Previously, `mi_atomic_yield` would not be defined on ARM32 big-endian
architectures if they were pre-ARMv7.

Rework the #ifdef guard to be more readable and collapse the ARM guards
so both little and big endian are handled via the same mechanism.

Now, ARMv7+ will utilize `yield` while older targets will use `nop`
regardless of endianness.

Signed-off-by: Vincent Fazio <vfazio@gmail.com>
---
 include/mimalloc/atomic.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h
index c0425f67..6289dc54 100644
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@@ -370,8 +370,9 @@ static inline void mi_atomic_yield(void) {
   _mm_pause();
 }
 #elif (defined(__GNUC__) || defined(__clang__)) && \
-      (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \
-       defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__)
+      (defined(__x86_64__) || defined(__i386__) || \
+       defined(__aarch64__) || defined(__arm__) || \
+       defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__)
 #if defined(__x86_64__) || defined(__i386__)
 static inline void mi_atomic_yield(void) {
   __asm__ volatile ("pause" ::: "memory");
@@ -380,10 +381,16 @@ static inline void mi_atomic_yield(void) {
 static inline void mi_atomic_yield(void) {
   __asm__ volatile("wfe");
 }
-#elif (defined(__arm__) && __ARM_ARCH >= 7)
+#elif defined(__arm__)
+#if __ARM_ARCH >= 7
 static inline void mi_atomic_yield(void) {
   __asm__ volatile("yield" ::: "memory");
 }
+#else
+static inline void mi_atomic_yield(void) {
+  __asm__ volatile ("nop" ::: "memory");
+}
+#endif
 #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)
 #ifdef __APPLE__
 static inline void mi_atomic_yield(void) {
@@ -394,10 +401,6 @@ static inline void mi_atomic_yield(void) {
   __asm__ __volatile__ ("or 27,27,27" ::: "memory");
 }
 #endif
-#elif defined(__armel__) || defined(__ARMEL__)
-static inline void mi_atomic_yield(void) {
-  __asm__ volatile ("nop" ::: "memory");
-}
 #endif
 #elif defined(__sun)
 // Fallback for other archs

From 1052c30f034017c67e5eea0ab45e032feb1e4e1e Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 28 Mar 2025 13:09:24 -0700
Subject: [PATCH 276/352] fix parenthesis in #if condition

---
 include/mimalloc/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h
index 6289dc54..39ff5c90 100644
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@@ -372,7 +372,7 @@ static inline void mi_atomic_yield(void) {
 #elif (defined(__GNUC__) || defined(__clang__)) && \
       (defined(__x86_64__) || defined(__i386__) || \
        defined(__aarch64__) || defined(__arm__) || \
-       defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__)
+       defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__))
 #if defined(__x86_64__) || defined(__i386__)
 static inline void mi_atomic_yield(void) {
   __asm__ volatile ("pause" ::: "memory");

From b843fead226505f8aaba05724880a3e99cd500c8 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 28 Mar 2025 13:11:37 -0700
Subject: [PATCH 277/352] update mimalloc-redirect to v1.3.3; fix issue #1049

---
 bin/mimalloc-redirect-arm64.dll   | Bin 61440 -> 61440 bytes
 bin/mimalloc-redirect-arm64.lib   | Bin 2976 -> 2976 bytes
 bin/mimalloc-redirect-arm64ec.dll | Bin 104960 -> 104960 bytes
 bin/mimalloc-redirect-arm64ec.lib | Bin 3308 -> 3308 bytes
 bin/mimalloc-redirect.dll         | Bin 59904 -> 60416 bytes
 bin/mimalloc-redirect.lib         | Bin 2874 -> 2874 bytes
 bin/mimalloc-redirect32.dll       | Bin 38912 -> 38912 bytes
 bin/mimalloc-redirect32.lib       | Bin 2928 -> 2928 bytes
 8 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 bin/mimalloc-redirect-arm64.dll
 mode change 100644 => 100755 bin/mimalloc-redirect-arm64.lib
 mode change 100644 => 100755 bin/mimalloc-redirect-arm64ec.dll
 mode change 100644 => 100755 bin/mimalloc-redirect-arm64ec.lib
 mode change 100644 => 100755 bin/mimalloc-redirect.dll
 mode change 100644 => 100755 bin/mimalloc-redirect.lib
 mode change 100644 => 100755 bin/mimalloc-redirect32.dll
 mode change 100644 => 100755 bin/mimalloc-redirect32.lib

diff --git a/bin/mimalloc-redirect-arm64.dll b/bin/mimalloc-redirect-arm64.dll
old mode 100644
new mode 100755
index e636028591dd8bcf7614a6cd3202121c1913df81..27172d2c0d5f24e2b1a4cccc931bbf530c20860c
GIT binary patch
delta 7098
zcmbVRe^gslwmv5WO>C*qmXelM5`hwEX+vuzXlujtS84@QT3be~4FxoG3Z+ODtd*ck
zAFe)UxE{x{{IN{<Q9>Y)8m;i0S%%e_%F;FSKxZ7@JasNs-%EY34%N{i9g+8)dr!1w
zmj67~y7xQ#JA3c5_c{BVdvkHiac()Tbv>1(4V&y=UBCa+H)`_Uh+Sv@YB}?oB3s@#
z4J^K}?#!zS=AAjC;P2m1ajhbkRF%89tz<IHacm)^!ha1p`q_U;=a1)wn+f@l5c1OF
zgqVTELy1g*t(25%IU)Xnl$bSySeZoEQmbT2Wir*#L3V*=(KeQ($)a{vfO~1EOmmVx
ztKIjL&o)Q1iTgeMas7)fLMC-IOVU&Q2_12lm?<gnaSl3Zlw5LBvfIpcuhwU;Xm8Ze
ztk7BQ9u57BEx9(m7zQ~|$~k`Bi&b!t4X4@kx)VAYF1XUEE}lp|=r@0+jeayUlM)+j
zRzU3Vb;-<gZ6kNCdlUB$bm_QX*6qCk16{b@7S&PxyKpx4Plo#CIT#z=C-^woQ2!k2
zh~?dEsLzU#y`^7{MA(1pbME6Qn{1ah5whDr$ig3qOqr@k+>dAVBX*&~*L@RDPqX_I
zU!k9cu1R`RJ0AmH5&bCEfxVyHpX`00AQGp`!So-IHDovZjq0N8`9<4klzqJDPU>Z=
zQc`q<SrWm5z0QhKjCu%02)zlcc#eaeOG)934a_eTCUmpmZ4Esg`ff=F)tJnoj-@@6
zUdxJZ=+n>@>>oEQ=OrNDX^TF@veVMnWyX=`B5}lz`5QGywNW(!wIB!zBC^-^eS~hz
z7PtJCHK!ToVN_m8W?n(araNu9QBW>If;zfFLM<r%D0m+7GwfoT11a5R+=%;Q#(U^A
z`?E21-}L4vp&LI)GsgWfvcNW!mOzdaV(2SF3G<=R7vp03)5p&G!g2dQnAt?els@?4
zwai0TWmaM;Un*D|ycA%$m1I%C=4owoE*mqwlsJJZ;@gd`mE<|L=vj9B#wVzc(e&4}
zG+Fwb4W*~ke`g=1uQ%l-OObRE#jadv=Brhv$`k3fD3^(s-IDPEy_+RvX5`XEQe^cN
zx`nFz=>l8y{AyeDlUr@k2_&W1HM0YmJLp8HKXVV&RB70}oF&Ydb(D^==d+&BoK0Y<
z<^sBt-DOTIphow5atD>3{QJyFzL6v4q*0d$e1J4Yz(>+Zl*c2VC6Yr+o}-0aKVE8N
zLuT__G$zt)QG)z)*+p|hB8bQeZb2ovoJyP7(d?V%oK2AUyz+C01+#C`lo9rE`eHVd
zoj@OE2|2eWo4=bm$qR=MW<n4$d=|2OId{@H7RdR9_St#lIc8sxy(>;XHhqlrOve#P
zP9aexnDazB_l*dCEop2qBKUj+4?eO0p`;-Aaw176&hux9^a<J&@$#5!x!>^>nU*-c
z65;f*A=P&(?6OEyPPIkfoEEXc7wk$<Inh6`vQ-P1W93Tv0`ulBq-pG+Bb5bK&Y>sR
z(8`Rt2%n!LgxZFhxWZg4vNDNTwrDle=cdwHPP2mC)WrwkU$>s@LLoFpaNy{U5Q!V!
zr)V_ireUGHx!I|2E49hzaQfJ<l_)2kAj(#lVH3G~wd>)BIdcpN^H5DDia=3&w<FL3
zcFWy!nvjvQys2bJ!36?S2;T?TVq272r#Lv25*kjDR?Ym7Irh@`dx7^;I$z>5u|?=$
z7X-~To^R3beer$s$u1&IY$DPcz67z(*oKQ>Y4`^7U@P&>syl9b{{lF^*CVOyi&ZJ}
zu>h)M8zShPKK2`ymXOR=uD+N4iaovhChhP7NyCO$ub>_5;_6%Jb1d7^qAi;*{g%CM
zv1?5Wq*ocu+e?etPjO$vPUmG&Gy9J`hRUj5ldie^5BBVuRaD1@*PK}Y*j(lO%86i-
zBB?k$aFX7R*z96|lI9fYiECEbK7$KI%gU8rqs9@Uz(QV09pO*<C7_U+6`WoxF`
zhCb=Q9GoRm!d$j$-6PZz>RFehZTgj3X}m(`h_%^;t;e_EUQ_}W4`D<(@+op+UHF1V
z{wCJsdJ?@b^U8a?V$!Jof65W&h$6#=SJ4Jk6#PYY*&6vn0Z+Ung@f-2#3L`j;=PM)
z#}7)(s#09+wSvu>Z@vs&F8E12UCTCXY>6LEkv^Ve?`*8od^H)m&Gu>>y^USkw3;4Z
zrp+hlUKZGF)DAC``a)xyJrqx=x~;3V!#7CztZQqA#^hq}Zv8#j7q;CR-<&3Weu@2g
z+by(<CEM2<3r$J^JwA!@Uy5KikpdJ}D{WT!bL@z{#c+E2P+BrM@-DK&;l>ZeRx)go
zUSdhxcOb%j+m|k+H%dpom^sOx3Es;yC$X(dUI{(3eZK~~^5PD&)@zX7VT<p0QFAfM
z&fKw8^Pf@n-5tgBZB|rNuWeo|?PtF&+O0{rouw8p(Ii|K$}fIiqcvqneQeB;N~_sL
zhf&*{A^j^$+PNWazEsS1?zF_yOzFZTdur!FJ{>#PVyMg#s4bUnW7|vaf!I}I)D<p~
zWGuy7ENRz5%P<6FFb^k{n&uK|0cu1!^|1b#YKh^Yk*XN=bECU<LV(fq*TN_-RnlBx
zv=Bx*JThssYV`6&#i8;BP=?K?^i8Nf{ue$Jd6+%2Yk7jwq)Ty%+M)p#*p-^X8B(}Z
z+Jgmonm@oXk!F*b{bkn+nnEKxw)+-svr$^ghIVh(P*Z4T_fvXy*MTkUo%`;g39P>?
z758__%<SO)Gqi-6oVztiTiC<S8#tVFKDI1bJz$LdP{3Ztu{^|Qr-eWFlG&QFV|20N
zWih;*pv#o@vQXWDd6YiFn#)VlgF2Jy!Uh-T;o=jRD{hbj+@PO*Q@$y43{UNNa@vcU
z$@)^A;qCl%8q+H8e|D}p#_p@wd(9#|xio2Ppkf0X)7YElKY_y)e=7xU6<8s#K_H2t
zs;=De^8EmBB;Z<#x9#0-T=#PZe~E+)mJzb5zOtb{qq;`n+<5awV^vi}S$$c?IyC1K
zE6Amstm7mcFz@{-O|{L0I5^v&vauKZRh6pDU-VH4F;{EQBtdX^7maaiIX43CiXm^}
z98aYUAi}s=;aXwe2u^jshy?YNw7EeXc<@100ly?z(F5g8M&eAIu!S=|J%9dux!TU>
zZADAmX7I<GOE5ephQy02g0#NHl?Z<7Mv&<U50?|yD6X5q+i~5Es|HsyF781?;)s@1
zS5=oCVyj)p=kv5;e?NZ$Ew^M^vhDx7mW||&in?u;^_veJvX|9Vu(zs~<-bph>m7B6
z>nqFa_f*x@A1ymnR9Stv4pVezd`bPGokuI{8n@IuR8@Dlrn<7Geot9lRoQ_<m8`L9
z1^cingUOoWR}W@r^lMidi^}SZYpkK%Lw}+_h%BMz-rTj>N)k2o6^9P3!X;9Ie?(JU
zzo)Y9!K%YG#Yd|Tpv7ux?hcLCjB4p5TYPjQexiAstdvO)ZKQH&?4gG#)rC$!nopU#
z(T3-Z^thGk{z}|@`dgUkXg&?9-L;SEs7E1M6ha>qS|bWMu9D+|oK#42sQJl!s>flW
z_%pHzcRnR5X%(bHAss5|5~Q1xq1qNJ)wc<1P|@?LdZU7zRLF6a)Ll}CF|*;P@{v{E
z&oQ=5kWPiPs^p`BL?%xkruyfE=uyOFK_Lzb91*BYbLgX&?xCg+1r-rEd8G>UA{b{c
zHIXmXh;#xC0>y$6S7_oTs96MU6X=+wW>?hWD>%(AvQ+9;2z{&2^MsE45@`$yaYPYA
zrEM)VwD6pZE;D|mhG7wC6L>{JrGjz^ti4j@hlJdMZWZW@$rb7dlWIa?L5~WI2pkk@
zhS1pSrPTO0)hJP5Os*ALql&XUtLU~1j7{FuLDf7aJ7exb!+)|<i;tXh6{wN;zw
zM@WCzM)Q0^49_w%AG+F17X3!WEMYRH{x#G>N#}k>O$OdXY!-obfw6GIq2_+9y-Uyy
z0^PIJL?yKf+9xnPOHEe2$%38~sQX3@Lg!MkO;OVu1#Jyo>N`wL4k5Y(x&_8UB_Vna
zq51?pA}}&b&8Vockyj1Wjs@ON7hC?W4r>z_bLdgJ+2HF2T8k!XH`X(%^qAU&fluI|
zK+6NF!@*GDzagSgLCXTCVtBu*=2q1br&Q4>&?3+&yhRkX#r7C2FcuRI#okgXS*|rT
zh@Mt~!I*`ZUb{L`qk@hIoD>*y7&{%JYFGw=7J*kUmdP$?r$D#BHZf?eGH9|#&|!hf
zV4-Jv?^Pm~W9(%y6#^5hRJl!!+%9OBz*d0{p*8}wSLWqST}zeP*6CD?C01LldW*?M
z(Qj5amqm!SS!ThStIZsu->RB9g;+bwEOC~ZTl8C0vsNMcW|@^9zIwE<=(j0mxuZgq
zXPM2;ZHo7Y@=ola**d*C4F-X+^k?T~4w*i@lV)3mnthh~?0ia9b(c_Un58~DkJeDq
zUq-cByg<5H{HOnt#NP#jcttFTm*XNP{2F2}e|oa153CQbkk!-~XUHdHJ3jb2fgbQO
z@Yht1^Ar%WkADchg^)_(q%Pn^;?$JhN=Olvsc$_YJ?M9AAmnSH{Wd~gMt=nOH00Wi
zgxm%HBQ`=dz+P8K$TGZ-nF~o1cc^Ivu|+FuWPFbPCFF*!gmghR?jQuOO0*4l6o!d+
z;0p#ECKeI$B;+<=HZTk{Y2-NmuA2$j4CDc~0M`M-z~eyMP9z%Vj0^52<aGq(+S7!A
zkwI`TA%CQ@rsbz<0LHrs@p41p1`WQ5?t>l%u<yqVaD68sc|f^L9Z+{b4a6d_6qw5c
z5ONU60~;4e%0-|}qTvAYOd5dP&<D%|jtf~=p~_a^I?NEBiI_qsZ3S}2c#<l15#Scc
zmP$ev6F0E~`Bb}v>;@L0e^lX}v}qCz?!bT_9DHCqFdJA4<N<gDjsm&f6tDmR;N(^M
zab^+|csG#qgM!C5K!wNQ1v{BqfZV?m$P>V6#cQESGcE|6Je<(rXh8<DrVYpq!$6*a
z3^W1BK^2X_Ozd2{z*-=_Y|}PiE+Lu`fp{$-&%h5J^e9T1;>k_6Q_NsBmvc~5_)*0>
zuscZ?5c`NULSeL)kYr3@1o$*E#AEyn4bA)fGHnX?jPww{wb$0$&>QR<?3?T}_gng1
z{hk5efINWh?jghuFnEpLQg3T}Tl;9ct~0UI-s$dY=?ZsEog&@V?pj}iFX)T(O!k=l
z&R$oqr+2(h?lS}&{iXe_{UZaT10<;Wv3c#@!S-<bNc(uZrPJEfY3p=!`nrN$gIyzC
z#%^=BrQ6os=JWV`zCmB2-{5cQZS5riU7(@g-9I@nH9*4nKMw*To<xt!TkCD`ws__C
zNc&{_R6FS??R0j!I%_){I!C+4yX3A&*JPKy+tKapuI+B<4*N!Y<GzS*(r5MC{0_g<
z-{kTKdk1^X0ZYIdum?Q-zW!i;xE~*#$-O*lkI`fHSUs)YHm}F)^9H@Tj>HakXG>>m
zr>AqOi=5J(GMwVBTe`Vh(&O~I{0;u`Ub%O&*AXZUI0LRgZD6E-w12!`?vM1tsZ2;I
zz~-@g98I25kJB^g4SPqumJVx&t;62o=<sz0I|n;QI*q5yr!1$er);O%x;@>#?qK&|
zcVdsB$K2E6Z}qqNJ$|2`^y&H%`wV@?KKvFT7B~+3NBradh=0;=?X&ec`kZ~PzF=T5
zFcKIK$N}?!WxzJz7%0W!Qj%eYfyd+X40=X9hIV7SrQO!v+R@hG>lp0NbtQHgyDVKO
HmvQ=kBa@Zy

delta 6972
zcmb_he{@q-p8wvLWK7gzYQcsU=!-3FTKSRkLkft#%CCTep@4M!MM|l(43vn~Q{-S1
zJUVeW<K%f&y5r86v<XeqHVwWREu%B0>shBe?(RwT;C6R)HWi%-Ge=Ez)rK8pzxTZx
zh3xUa?Kv->`~BSS&-?4$q@I+{lhXNbArtbNwT`pbz4VdXKJ{wu`S`QUE6=KO=GDJL
zY&-wuE6=Go^@?A`8Lw*ig(^3+HMgp^NsG>Xdrbc8F;mhD@IC+x(uYXVY^NTZxxY98
zn*iYO0L-D2!U8=u9J004w5X${vjervbc$4wKhfDj6%lDYema#l;Utmm#%J~S-+p#o
zwi-O|n2woVYz4>|y_K?wm`)hM(_ka8GQv32DI+lDBCx~8bg$H>FX?X-yp_tE`nv^k
zmX=&qz7-8>P^sqF)i1W8gKBhIO|Cv+B#D)mJ2e_W?#F)HS$+0{OP7js!SWKKhpwJQ
z16Lhk&W*3(=X=I-{Jdb?H4g)wzfQ^;iRo>0HvWGMO_enm8@o@iIMt~B7gWbw-mXUV
zyd3Eprb;Y?{@GOX04v!lO|1pkVF8%=J(;OPmDYWKRo`Q$9J=}|1Ci;2=9kD><p+hY
z>95Ctmt~*Cd7$skip{d_Tbb4wYjF4<N(rz7{Uwd(=?gb(B<JZLZ@7zC>7wFd<7lM}
zIAKrHEybb<1rdd#1$65*PWo1HF=HfTcB_nXv*q^!k(F;sdWqnzQhMj~6H-LC%o`NE
zHS~YxRk9_Jx=YG#pw(sN%NOh5xwH<VIDSdnd3{#PKwA)i3nINs`ZhycdOMiqr_@_!
znSoK+O0uyPgw^zwRf?Qy1&ABHH8QRR)gM8grF=jAq|AvW-74OOpO1_8l27P=h^6;`
zvMvkcw)e_J-Pv@Jbf&BT9;(OCm#30op|LN=<@7HeeKV5K?SAjlS{PUR&=*_FEOkw$
z4c6v4oR!eaN}O)BSX{uiPxM&}9k>3yxv5Gv0{978DyOcIvXmaX@d?sNN%^nz-kI{J
z^h|j<8K)nV-(o#IQ%;vd7We2(8=I|3O`a^5vP=dmT~+ZOsiB4QD;9cZ$?3(H>5gjh
zM@3Th++r#F;c6*6iAAY)gzlMt2We7b^Y13)UTUcFlWp{cswV{93|eYiNp{ekwzBms
zOsbd21=qoIMD)CqI!EN=Utc=O_H()!ju_2|``}0#`E(g%Cs8-`Cdh1>Nj=jy$dA$?
zo9$X`OqNMmz+uqQNn58GiS$iqQH~bPL@uOl)yuBZ&5+sf(n$3((o8?7t|DF3P_vp;
z()yYreJz3K=(US1)LpZT5E`!e*|k_p+&CBg;)K$=o2t9kn#M04h5m~=kW<Czz8DKQ
z2lD0_JRuDC##g+6rEtV2VgW0#0OZp}sE@j-W{?jE@Z7mKLH-ci1aoG2tFa7PEL_7i
zsDDOXK)9Rs<5V*p>b%L|D3!7ok4pWQ)|%96@YIBhM@O`r*dUd%M;q7*%qxZC61#Zm
zB-<A(2aMN%i!XbI)-Ecda~3QhKc!O(W|DIHz=BHhIDK+K#kDvu>=*(fojJ@pZB^*A
z3Votq7fuk%kL~g%1MVq2ql_}hOt~4A7nA#`bzw2iHyY9{3(LqlwdGwnt9VRZ-WQMl
zR?T+u2~c;^oAmU;UBpgJi>mOmcF{_@e{(@s)<JOaO5u#U8gMXFEQT{5ZU=dCtu~Oy
z3vk0@F>IFvaFE!58IMWto=1*t*fdTM>r1e{V(eQ3*Q@==tNo?Y89@Eq&eH~yrAHrF
zyodaOjxJuNuP>6#G`sjFa+=P!uO>aT$?nsS7Rv9^PwWo8_j)-&SJv(#_fZ)?C3?QL
zirh>Kmrznj!%NBq<KO5XmMkI`nq6|@$`zGZI&;BhzV(0<%irXt3Ag0rT3or>HvD!8
zDv71FMDNwfL-fmKcaY!F`sGE!pRb{JFMo`zQpT4T>gi*vZWSuOP=2%Ob^|W)i?{g<
z^~LgxDf$!Xuuwjwd?UTABW`+n-D2`fI=Suy`7u4XUewpmmETw7^#Ov%{F04}_4V`Q
zeY9m`h2Y&sPi*{GWbbjTHjpy8<a7FS$0~fEopszIj*2o{NH1q_!Owy1>9w#D7cm|?
zR+ayX)^74y<V|PFX2C;m;{@R8z)lrE)LZ3$rsJFLAUo;Y&2wgYt#a_wOD9EKn7_Yp
z=_D@nhJRGHY~C%9^>pY?o8D@XFVKI#^F^WP0)2GL27z3lzuU4EkK2N+9eOfb{uzC8
z>ki@LP4qvvmIxo4l&`nGAn1eT^7~ZYR!V|&Xq%`f74qwJeA^n`b@Jo1u)%KdR>-rb
z=!S;<q@A8_SW33jj~h^}Qf{ZF?e~%&(w6O_adftv!sY!L9p66FUSFb?<-q-|m02Pe
z{f+OdYalycn@B(Lq8=LkQ8gOZj8^1~PM65SWkxe|Mx<0V`WI$Y*n|R(Rb|eop03(a
zS)c~>Vi&I659q-irN!!Cj+V+#;S4;@t|mBgHg6SuX~)ln(K5Pv=PErB<vZz-o$Caz
zsJyZBDU<N!I(leN34`X{$AzpWA)MG#MQdEIkmGdHwL`eLj;?5$$6!~}<8#S+brXsB
z8u}177@m4(ScN|J#vt_brlZ6_+xL{>=TG*WpzC+9r}s4fi9~7r-iGp|u|Tty(E1!&
ze~6Z<2P{MNP(WYVyLSF4zUmBcI*99}`EzYlH{t1P8`Z0qCI>r-N9nwlUDVsMqQZ=#
z`35{Jg|he5cFAS!d0HDUn3*o|3AUvHmT}y`(Z%sGj?Zumar_64nzi<c_uGA}Re;YN
z0<hPE&x0tiKg`111fX7ID-O1+oCj_MOdJ-q?QQC4s#wlQ_3q2K!;C`*RDQ6y%i{%b
zGPYA=b9d-PEt))d92W}f*E}u3CM1x<>t2pq%D4&SE{nK~acl_QLgcnLGY&T)_>Ul`
zxxWGYCIWsB5_=wgSW_V2q-*vycg+Wvc~VNu|LEMga}R3mI@VUT_{SyuW6c^2j{^e+
z{!C+8*CHq4GkqhH<p6c~xbR8hvjO==eD>q>I6g8y%!2?r(8IyDgG~o$S?jUuSy{Qi
z_n*L)N4A#yzs>EF`i71Jo7&qt>f0Y~+um`wtsU<iaOd8`8(TWo9XQ}<YTtXH1>PlF
zJDi6Pb+j~h+}(D#<B=x1rp+Ym)KgEph3;*up&Q!f(d*kv=re5<FYm7qOiLDs8(Q{=
zwM&(S2mXzGC(?rU`wUB}u?qS^`#nF<>QX*!e@9=C{1iNF{L&0@{Vp|hH(^Vee0nJ%
z({%LV+YI8j1g(2SGB~~y<Bv)P_jjVLQ!-?}6;1BXG}j(l|Hul%*moU%-VtFP#G;Un
z6~r{c$)rlQYGjI&K#6w+k&vt-#*F6Hu5x}&(ZNZVN{(n`CnvjAGOdwuP9{~-qKtN7
z@H8ilbn>Yc#G#Smlom*0q!K?~M@;u|s#O_(`VcX7aM7cRRzDZJIYu~Y;|<_ewmq|p
zSYPM#7{_FunoUznaXQU$I!`U4shL05vJ^SmIp%`;G_`e{c5-y(sU=i38hF;C1fF*h
zQ#bFADDv}IX@ZMM6u%Y<#V@p=xgfl<I!-${I`h<cWiC#;Ir=yzoB7Nt^)ilAfYWh~
zV;rZsTBD|x;&g^1{I}*M&ehDS+Hwo0Z5(r{*X_Tu6p8nzpC;-ySWA{WIpxYT9m_NA
z<o%iNOnsaR<e9eq=*p-G-aq}VX+e@xDTYezTR$S!X-=8H)U0hBbG3j{pJ*f^XB#=X
z^0fFYxH;|P7|2r-mD2+_3kgmqIi@)}_GvjxDC4g+6605zWKjx-4iT%Jiw=%1j=4l7
z&9{%!5sqVdYA#i6b|y!Etp&+>@u@S5Kh#k7X$wi^q}g>`&ylAe&uLKCuPnr@5M?)^
z6TdbWc5_AdgIa)T?84IHoKA3@;8@4i5}I0u(=e@Bn>hx!noW7=)m>z^jnfjxTorD$
z8>_H3@*X!wKgR$cYM(Z0aZZnMOmfUQ<nx>2^fX6vR!i^liNhjooW>iB`XX>~oZvy@
z+8lT|9pIP?X2I#-jjM#ysa*F3&6~4T!wD^Q8>bx{-5mW~&8S%4dTB=O#8tTWFhZJR
zE?E44<}D|KQPTrus%ImqHFHtS(OfTc@Jcf~?>B2^5*M9$X3jh_7w<Q!W;eOH=*u(f
zZoe{Gz(|z8_G}>w<6JS3XOLgj1nb8a@ExQ&%~hxKO!6x-D)sN*MXD_(Z9HrobK{g>
zNu8$d;A)L|>iHG8mD>MG>S;z0^?2v>Q^Q9;E@aoo_#A+hhLkRC1qjezd~~wwb;jbQ
z^B{5Q#1#PBAVu7WapaA40RKc>`Xv5>bU*U;RRAsEBHf5jqAack*g{bM7Jz<~Eo%V2
zL^R(D@C$GWam1%luDcEG(0@PzScCQ{#JTJWR}Wpxq0m|nuwI`MM%Dv_5ncF&7cZV<
z0`U<vaNdDGU!WuBoh-16OdwVx!WIClkkUza0nA6a6_F)6hPWJ&U6+p`dJqrlT)ND5
zfL~)Gk)0S16VKe;1#p(6gt10|=P?o24*`PA@E*)mND0z?sE7f453mZ*U^hT5B3`i6
zfOxY~6TvH$ig*))g)9Ls<K0Ttv+U7O;@Ha3--QiU$p|7dOe5AJTADT4fym@e#O0VE
zz9MnxTm)ads$+b!s&=-$m?-jY#M$5hA0iv#2$vIxTd?2UqS@OK*@$&DVuJ<tAyy;C
z5m~?#M>BqRVTyQy!b(g44_vh$k4ms2pWys7=WY1yhw*r3p&hObMCKn)M701sq}Un|
zEUl{CCE$6Z5_q6s1u0<ykr_gpR)L6U#Y!ZOF2wn`Uws_oi1<4*nLu0!AQ<s?Fot+b
zfS;lteC6X}Oc8v4!zO|wctBm81a}znr8uY92%`e-Avl5xW;ehr96~&l)4yf^R8VI8
z<pSvn^hf$r{gwf7pmD%I*gcpWG{(#^N6a(q8%_)Z?kDYA)pP)YbwPJeXHUE*-J9vP
zg<Rp*a3DN!Ds{@z=ZrK){E@N#WWO;g4LAlo10#cp!RbMJtS;t`MTX<U5C_2J4Pww1
z?CuHlM0!Sg%pprxNDSFSzOX;s9gc*d&)8?~6Z<+No`^5f9m(`h_qPtX2U3IS!N!;?
zmK>fKP7P;<A%R~W(U~LY3^oQ^gNdH8o@CENPpYRbB!wIyXQ(k0508Ws;jwTsZ0oc4
zNqx?~#=bx#5*dk%MUoLqRE*lAQnbqv^$&Cp7>CS5mLc1aC+3U!V}aN-t2~Kc&=4SC
z3|Ip0U}w-1^acIFbWf(o6>1H+L!Qt?I2BHZr^C#7Yaer*ibzpMv@tp|kQhh~*oW$d
zq#?(Ub0`vv$3|j_*jNlwnqM(s3)s5?bpa{R9Sj8HL36LASM0U*+IxK=f2cbY3Bf7j
zDf21IDe+WipQq2)=kM$8%S5Il#{Sl*JK7obM19fJKzblEFg*Z+kO8o>e4~-*NOUZk
zj9Lc8LHnRI=os`5bq_^`MurkY#$ofYIBXxT!x9Np*wDZe@CCX9k-&5iddxjykGr?C
U*Vo(Kn+|0{5H^SL&No5+7m$5by#N3J

diff --git a/bin/mimalloc-redirect-arm64.lib b/bin/mimalloc-redirect-arm64.lib
old mode 100644
new mode 100755
index 11d71ef9ea2aace6dce34616131ab0e5654f45d4..dca80b9b855c57735600d925e005a34cc88b21b9
GIT binary patch
delta 97
zcmZ1=zCe6~84JsV@&~6TTd^Eq{5jc{U1PE`s}qow$1c6OkyW0NGssTz&PfIa2ByjO
nEYg#YuvLJS8Gw~kuw2+4^=a}ec8Fe;$>%vZpn5-ZtYrcKos1))

delta 97
zcmZ1=zCe6~84C;No=-<6Td^Eq{4v><U1PE`s}qow$1c6OkyW0NQ~2wf(<d1i7?>v8
nvq(=q!d3xRW&l=F!6Ki$_{ro~>=3;wlh1Q-K=pp+Sjz+exUD1A

diff --git a/bin/mimalloc-redirect-arm64ec.dll b/bin/mimalloc-redirect-arm64ec.dll
old mode 100644
new mode 100755
index f5ee4e4765225f8725fb77330abdecc19bcbe172..a228af39d5a094c6c4c7d257fb6184c7d66b72ca
GIT binary patch
delta 10127
zcmb7}eOy%4y2sbrjLHz`n7j;vGAIhD>5v*wz6_~}SvtNY-PAK4U&4H8OjAsap{$GP
zJ!ht+!70l<Fu(u<jE**#W<yzFWoNWEx}WL{={aXIt+CVTO6B4H*4_&>KIe~n`F!BJ
zpJzSmS<iabv)5jGV>{B<cBHR;5{)<}Pb+=q?E|fCt{=Yb`8_(~%EbWW;IN-AzOC@e
z#Z%zX5&yXOregPxZ{G#;hX17K^M?O$@hwG;`<D018y->m&tJ2mnENEXe#|onS@~=<
z+4$GLJ&Y#Sk2(_Evn8Ux+0H7F9ju8yL{~GdG?(VF2c=9JU}ehRdTA@&!P2At;xI-Z
z)*P8GE#J!edxx=(!c=K_3H$D$$!z?r0?m~z@Gj5IU<YQIrR**2yID`tdba-Fztcl(
z(d>t5m+$r212nXYotb-3dhk)U!Tbt6#^UC+NZJkT<h<{s<n^q1{;Sf!b!_#5)zYkW
zz7H2XK&g|BSoE3n;9B3m7Jb)8TC)n{DSnrDB%04smd=XN784SP`rW76-?I%%?@2y}
zX<pm@M*X!Y;s_Jsr8L_?h;9BG?99?MY1{AFcS{qcLj%|=W;1(WSt0^jzbr{|C<@=F
z%Qk6fr|;eshbcYFuB|*m+t}e%nb8HAj?i3F*ORPs)o}U|>$7@1{fXtT&X5W++3!~0
z9^aPZ$fBeR<JtLm28_4cSF`$I3iolvlcctMHm)|Att_6%qkgIQC_TXDtvODgVR4Tf
zmuNhD?~(b)){wP3G&I36jlH~fIL%;fYbT}WBsjvONtcF@c+Arqy<C+$N1M91Oq^`!
zy04_PzU*J?hUd^kM>zK;;{sI<r<l6JQKaj9uBq$mX{N5u;ih9s=Q1{b{Q}y_YS+Ia
zmCa-KZ1711dF+=Bv!pUJyXVm<^mF#iqjqE>(fUrm{JDg5zZJ*MTl48WHe_R(mKtp5
zLe<o9kj>i|pNflc8sW~nLT?dAhlcE<1>B&IgR!!WS+Th2Ofz+1G0sh4M>m$}px|@s
z98Mjf71VK#O@3@b)LU^5W_o0TwEO|~>SN33ewMK5F8T$Vw`q{{3`L#>Gux&K)X$D?
zYLT*Mv(=l2(yeUB;|5l<Icd?^dl6%l_UyIoL@9t!66sQcI8Pjbe|A?yV`0MM$+mI>
zm`k>Wp$`v22uTQf1#xsmk$vY8{MWdqh?B=U3*NJ$qO?4$tmJJN;oA0Ns`nK1%p_eQ
z<aZu(K0b*4%#J=jl^WUdE%7vs#cUZ#(^<}zj95%NnrinDYC4v{8)JuyL}i+~R<N=y
zDa^7ZfgReCB9UwC!!3#QNA}k(<LTd7^44tj=0l^HY)wr_p2dfLT^YLbX`)ow-&p<D
zhvMoBdCN(q<s=(el1Z1a{F2$FyJvF!iR-E8M8HZ4Qphodg9MYvv35ct(fKpF(kQt?
zC9)6&Vkd0_9WmYA6)2tto~H01lhRJ8+D_@Mju3RCh0SUB$ja~2C44=k7zbdC_`K0t
zQx`WFjX|>Ed{kS$gV3p;hNEt2_<8|xbWS5E1ET2C3{jtiu_ininUIAT_*nE#(?{Px
zr`%{M?5mQb0m!>5nT`>(GNV6($xp<xjBQKk7*@7zLTuY0$Au{3=!11U!P>TsrUTjK
zZFf<LWk1mxyZqMcjT_nJCkkWJZ&)v31=}B{ZdSj2G2P4Pj!gO#%ihsQ7qd@x45xEg
z*N*WC^zIuA5<+@-Dh{)Polhqh%;qhxsfkx^3Y0jCK+C?|Y0#7<IVu@_a**^(fA-bR
zMBkVvqxp6-v2;wof^m+^KX+F!b91UR%fy~79Xe#7vg~3JdBvfozs7={$B4JHw$efL
z1@=iPqo;gpY{?q-#Ex4^ZxPG9fNWEic`+*e7VH7&&B?QB!4ijX{o+5w@(v(f-*g{5
z7aqXdwcoBl*4q^seV~h;OcsRUFTB9CEgTg+D!LCDsOaFkvl=Uhxu5=5cLiTLWgYAX
zv~2gTT>6x+ZC7<)Y1Q}atJ%1B_dFikmUKPsgweg5qSL<@{j3KJ4x`2=?+r!KDXh$X
zkPc(%j$Ax`9&}VtJ-gz_h;199_NKOTuldrSe}z&n3$Wb&LDDu<ZINu}3Rs_g8Q922
z?MtF@WofED%69Ji@7bLF_t4#J|Ncz+GJAdhBtt>E68p06usft;j?+j!sx?T<RK15K
zzi=S7E&Y1P&$G4{X49oCt|B#+4!f=%f2q5IZ~Gs0byr~5n19wcqvFqevrl+2E7qyM
zZnuHWdhsPqA6T)#E2+4z4N})|Hu9xAq`zNf%U+s;r%TgIX_T@vFRhF#OTOOk%0)Ki
zz*@SKH5^ExJALmQNR;qw{PGq1fb@*(Mk_DFt?^(yvmeyZer*3iLu}cNRvl|QI7w3w
z<9Loy*%(VlT<_PyCd+H3FTQ0B^6m5}`%*@~k=I*KGOhDCtzmCF3rh<IJ3^?2oNtw?
z8HGyPxLm!zgR~88sDkHy?5<EVIM}h`7g1fP$PsviDSDcs_%QrCAM?-#AKQ9-<fJ&}
z-{gbst<v6BruHB3@A`MD{fm40x3M>?Mi<aj2j8R9(L-5d)NfDZUt>0%XyZE+DM-CO
z>7S5F>{j+HtYuaw8(V!=gLVUpa@`qQX1H$VVY6J*CG7HlbWO%n@r<i@*!}7r8bVsK
zP)K6ivyX;ei$O-=3A?yv5#7j|YAozaw}C%$$JO4CqTE^gEj`KB)GeeBvXga5F%K#|
zf}_e3*<b4ROZADYe{mw)>z+p!vHrD5F^g_^>~=53`C)1OR9ePfuFs{j@Hd4;>Qmt=
zu3@H>ozA9sZfE5UsWh3@G>nsShOm>KkvQI5YgjC;NoTbUW2H_5d$w^TPGwDvGo_6N
z_6JWAi}MUXF%I#h(E>KvGgrDY6zZ|?|GFnrDobM@wv1%4O?s(*Ae-`EgPEx*6*F7f
zl*^XXC$Rda(UN1RV)H9(j_Fup^H5B5VskG26I<V$D}6qI)i#fn>Ibk-fztz6-<Gjx
zn%FW<lRSWs=lAHDt>r#w&a_O$-W}(iDxK}m=6I(_R|c~`c<+(wImR;0A-zQVv!R}$
zG@DI6G*d#(zVi)bu0sZ@W!DZRu`do8q_eT?Ux)6%YE3+xD}9r|HXL3i9lC{mc=!$}
zIhKt*GME{B8Pe`sV3o+0_zal&PG7EcI)=UKy90^4?3*fGiGkr1x}2>)k}GA$@EJ)D
za@}ASI+7tJ$FSy_actsI<FFZOAsDdJq7anlUp;^RL61VP^FrY3p2I#ldR+SP0(+!x
z6g%;1_OMxMKhnD&^hyCK<&emR_#c-3)K4ji1O9xr>Znpl-R}Fb>C9+dGP-kmS6Ux(
z$cc@7EjC#G44=gdx>M8j4T>HGZ;$3S*zB2ZSrIFGEm>2R=y-}19E)c;uf=Q1avfVC
zWN8YL9P1SA=f`gC>%Euub1YZHrepD%&VG&u6rl_Pddy~zzn06FqWQHu=xNsUS|*D<
zu8VQRsfYC4aqPq6x3i+-L-^jf_4pJ$&371Z3b5e!+V(bg_wcAs_-VeAojE=U+w7c^
zN!)VyiIF#i6({~f(>cl^``S(zC~rOg`Xe`5A3ixbMH{0ImrM!ax!3073C1U!ew*)&
zlTStQdv4mBV`B>P9Pf8`N7j~&XSckSsm*nK+udznThNEy1*xAZ(E&HNp$NCXnW@be
zmRYP3l9qe1uk9Sfeo^|VQUV)&YGe#@qKIvGvX|ZwrrxzQ`evyujlGYyJnq!Hw#?Ae
zQ4!OJ!|TWxWL*)LkMkWrRX`J#{<*uNd(*i0ol(^BbyS~*(9;BKH}IY911@4l_hQ&v
zF!8nNgzY(%sZCTRN)%sMP^Gbei+rMrR7QHnRYa#^E8#iv);r@6n>veR?m(NaS4GN3
zoyxpE#e#8gkg2^xZF5fcrL-@jQdutBetIlN<LR+6s4zGRt}VOshHYf6Q@UB|9Kjxf
zpL}i^Sxd)?87NZQIG!q+$KHQW@%f92(eEqLLn`*X|7r{hQ|V9;$6cdHHhu6Fg77Uk
z^JSmW-6JFee*x|u@k5%*p9N!j>3?8%eVi)|Hu^T6IY6sEl}3GSKuzH%DEO=3gmhJ3
zD43Ye(!W{kGk5%PfRyzvyEVKlz3_cv<3>F+g;d$nL*@uC(7KWRJRK#)zRTgQGzu^7
zzK1WqOr=ThvA&l^(sp*+rIfxU?-NoU&*oejinopRmr|(FxBJpKnw<ytW7n))zIl1Z
zgq|oXZ|Lsex?)6F$vQ6&LKgpddH6uthCaXFw28M7lSBf(?C<tS?1S&4*^!7j-ast6
zPE#P`!uj*(SE$#yKK+&WzP<_nGoAKbrpy;}9X#{@o*Fe(%7FM&Lhf7n*vwU%r>|RA
zxO~G(w!UkK{xqGl+5Fg}n^&#a{J@&WHb1_6-TkZ9Kl<1<X6`butgcbxH*8+HZrxb?
zl7;W6Y0MI|33do--c{Q@zRs?+{>*m9oc;gl@)><KhGt0G@na14FMrH%d!GNcSo#^g
zAy>rpr%RH?XDidW5tceTrt8vISo_l=f3QCdX(*}4PM}jYfi_~}^?_)<tdJ9_iI(`U
z45GQTLa&Dq@SjcQf<2j<G{raBYReB&rG_diauQ%+^gBjyLt841re0NdRqd<|2-6Nl
ztueiQV@PdSw2_J{2K4u;gKKrN>fNZS+g0z{LZP;(YLl=o5o)`l)@0orr(3kOt6gj&
zrjSsRineT;C;aH0>Zo}YXGbEKuuz+Y+N!Ai!DO08wR?rK(hmoZQmsqKURBNr2)SLb
zL$IgT<QqX=OTQ!<ul5*Kse$wu7pwE<r}T84XwwU3^|ICZugs^5X}U@1&4QL*c6xuo
zSlIR1%=H!5#?usy<;3-sxu%tcz1Lqhkx#H)$Q`QeFV3Uo)L)WEi)n_jT8-BvXcp{=
z|2LB@7L6r>wqC|JPu4ElWWiQJ!)xlTROIiR$|oBXdVB>^s`9E}^y{i!p+6@Fc0+V7
zwcQ|S6qJv{&VPA6K0Rg^3SE(4&m?T8R9`)^)&Ke3bS}-d2|If)d-CRQ?cM%)(_x>}
zD(r&2?1R1RBRzioop;ljG?vt;vFku(OShd#py^pcYZ5FH?1@tEzcPtus6^<?1?|1;
zZqB+a+FAvJy=+^B&q$BHR*g(ASox|t^Pt}`2V-lELSN{o_oI~3EkZ64v<vpc9P}3~
zMZb(zq3;m9+RM(?dZS(|I_6Wo-+enRqWC_XYN}I1G7I*&ZWkr)vz_TfE7})PJq@X|
zL9YtaVyeT@R{b&Z8|im6-7Q?S3bqT{MGXc;4Tgk1A{gyfLns$D7*uO8!yxoVL6aas
zX#9C(yy-4XE)tE_9wQN4k(ycrol!2DWWj)-8`<IhA~$6zJtP{h3X*y?Na2>;Tn0MP
zmL+Hs?6th<MM7^Cv<v1PQv)dH2`-{~p$-c63>oboxSHoVBJ{cjH6){;*{`~hMO73D
zeTkqfcvaY$#eA{Ek4yegF}23!v<sUK!Je_(52?O;<cMgmRPVWL(x}Fw6YR0G9{p`U
zgTM0$z84sUovD|-`|xk=&HgJ}`CecVb|t;+OMJhzw~KbKh)eE~d-dx*pLYM*ojk4%
zVHfUY-+LZ-WGnX39(CS&!Jc{do=29y&Bp!b3A@5x_Pys};r4%#{DG(Gbg49UFeN4(
zKF5F-Z~_<tCvkZoA=9}G-pA!z@%{(~!6l$+5dNP8dck#|A)b)UpdH)+k_19_fmU!Y
z7y|c!#zaC6fVM<ZN?f=ILg4`oNrW5$?cfQJ|7YqHXazq2L*U1tIhl|zzyNpwG!7=@
z5-5W|g1Qt!eg$oyCWVp+7z<ibF;UP9rh;ThDIpoSFb^SQ3@C#WK*LaEfXm=?F5`{h
zJ}!d`xokiNxD2l3ayl}=WpFc>ham%826u6JI5NOxa37a5kO3~2;=;uh_#)=v3h)RR
z08fA+@DxZ!A_JfS{1`NWUw{_y0%!v-fin0b7yy3-L!c%V{WFmP&;Z7RRuJDTNofEV
z8Mx5l)p!hO0VjY#a1v<3QFc1$1@8k5qmTj64lV)7Xk-Aig6qH#xEVB$K?cA8xC=Di
zh75o*xDSkg2U2M%F^xqAps<4;Fa#a}&Et>(FaVwcjYebul);ZdT@Eq;+Q1871iS=V
zZbt^dAowe28jlPN!34lq(13TLc+do<4k<+kTx39D1IK_eH~|cRlfV!-9V8Qx0nh+0
z1Wn))&;qUmZQwdk1~-Eda0h6@i`FjC4(<g*;6Bitiwu<FB7h4QXv{+fKp8v&>hh5R
z&<36YBj5+1<xXS(41!;Prn`^<&<kDy4U>=o&<g$v20+bF_?wIjfEF+w41%SpxG+sY
z20$-31~g1X20%MF2_z<D0JMVlfgx}qXf8kozyP=sG~SI2fHH`G#Sz^!WB{~*yTAyz
z7qm=A2Ef2{-2Vq~VVHpofHu$rhQK4B`5t5d41lLV<4j}#l);ZdQiu$I7VrWX055@t
zS;zor1%KswFcx2VBeOAcFgO=$54JA!|NI=ila}i7aY%lWkoD9OWwsGAi-f2fjD_A@
zM#$?l6lH#zkfqQEz*WRTO}hx`LoJf!8A2A(5Dh;|$T75g|C5k^a=UUuUM3c)WH%wZ
zA?u#wc9tmH9zrIge-OlWLnC{T1vr$fdkLAL2}$jCLiY2H4nk@mXT3nkHW<p_gVaW@
zRuJ+y9F$b@07A6#PlP-JIqyY6vOz1DE`_232M8GfIRbtOf037Y4Tq@l6+(7`tzb=*
zCCXk^O32%YEZ{-_aHy+A2O5$LZZ(j0upS-C>k+vWk~$iZ0Sq8Fqa7KrdI`x#KRo%!
zQ4EMD6d0m-^59>mXg`V!LN0ohklR2UUJzJ_1_h%_{X8;@gkuIzjU<EEAEY3dj+x_F
zL1vJUgky!G$DtDrY!t^Ur5}!0imX4TrWB_h#ZNhy4ZBv5`w4+L7#OcEN@j`#OL2}=
z8iInl<Afv=8!>`Bg~ebtI^rD#YlgKG#CZ{pEfnWPg?M`-_d~`JSMi5;A7#dcf;fkg
zd`vJXs6VL$TuSqB!82e3d4vIwujN%i;~OFna0VYh$RwZ+U;?>)3CIKX3Wh=M5AWkj
zAozZu*x_wZnIOJBKwlc7cvDi=3~xYWIo1qsTBHUYO>e6XtzZH&QV#Cs0ldraf&ekL
z^Hc|j6<h;$fCXUTX+BVZ7~bQ>8z3^c6XYMOtcjGggO#BE1406&xG2U&7;FV~Nt8sN
z!KW{<2(*A+Fb57og1Qe?*#zDOy;V>a`XG1;{m7rOXMjfVW6%PA0ouU}U;r$=go_9+
zegq9cY#N{$)abD(fw7<$j0ZztDyTb)T@y5cV?Zl70Stf>5ztkT2b^_I4X_AYf_B44
z*d;+TxEZv4g!_L7F1%3e0z=?lQ1>ymNzeoy0Ii@4ltB*|1do8^6YP?p8N9&TK|XL0
z<bgzgss@k;W?(}r2FHMv-~_P!Q{4ZPaB&ri>0s7p*yq6_a3NR@E&*G?m0%cL2kJX8
z1F#U>0hWNfz#zz%gnW)bAsfLXpanbuwt^Qx-G3t>9Men*AwurPuMvU;T#etXG`wI>
z^V8?)XiaH6!KX^6)mh@SIm?}PCoxjObwrMqb+TSI$XPNmA#$jK^&R!PhQda3V|io1
z)9xWndCjKgl4fs9YfGdB24+MIWI2t_Vy9f?t_oG@T?SW?%U)Ai6Re4@)72H$+1%ys
zfct7aX~=7^mNu3&dK<%@h$pMb(p=muH+QszT5uPT>D-^Q$k|>Mtm>!=SD9ReF0;$x
zYOM*>wAXah80+%tOm*fux7+J(b+@|>4OtD9jdG)|N#9i7Y;Pu3tOtCk6EVo@1S_49
zs;gC`I=Wg{UF@>DN=sZeSGg-x6RwHWT&*FsMRk@sYn`pGye{bOaEIMj-K4&-!Q5bJ
zur`!51RC2L^O{Ueg-u0G-saZkKy$E}zsoG)vzCo=o?Ix)PPfzRY;^{l`f5Y9-Bsz5
zU0zpot*%yIn^ns_SJrW_x_WplZ76RDH%1zX$I?{XWNj*GvNd%yhnmC9k>;z-@M=e~
z0A{&Jw#db@Rc?0%ogt^Gy0F?@T~uwUZgmA*?XC`&u{N*PR9jeUu65UW>sspqb?tSA
z`mFlA`pO2m!QJ3(Xl>AW^d5sJ%VYEqSzXfkR!@ni+*9e1J>jNE6KU2p>zget#VsW*
z<t=u;rd~=W03C8jj>yCr?KD>vRTWp2RJB(Jt3%b1YEw;NO;Js8O>1qSHdq^~#W(zC
G>Hh+r993HY

delta 10026
zcmbuEdsNd`w#U!;5uAW%qo9BwBn372jEWLlt)V`OS~b2}I#nB{R<WRs^--*kgxmHG
zwsx(=V+*}Ln(z$oVAO>AXt;f0t4{30p=)d>RA*f}RVS`x@4&5=`#t#`)UxKUdsz!U
zzrD}i`|Q2X`JMcJ!Fqgv_4okWOxkUqHOKVbCs#gvukzUSzTX?8T5gm<E_wLmjU!>U
z+&B#080ER~A7MMsbvu}sb~3EbOFMSsP*`4ko%`jbxx@a;H$I=wW7;(d_K-57;PEE=
zFYnMYW~Z@HB@!W~p_C-sHA$?U>gf(frKL27EtAGjAKQt)ZswF;p(QLa;;+XP$LC6I
zJJ`vb$E3Db*v)5Wv8fBP70Y%I!W>J}+2I97Y2S8sbHQF3#WpScFFJ&+TC{;a<2keF
zAq9PgU0C|Q)Vi5%HXfykY|yf1Y2zk#cG*p7XCZ4^ep1>}z}Bx=FVz-!F0OcrQp!fH
z`m@yfqUYCDHzTEU>o8w9?&=1qcsfg3vmkoo215K1&t2Y_$2PB-pO^^o?ye7O??w=N
z7a?v+Gwg&|mlv}OYf`0e^VrQb!=<ecv7<&Ki~eBff%-_cX>Gi;;-1FSwKh*d+dK=O
zKSt?Qc6Z%zn$3>AFh<!n+8$hL=rOR)7t-ioS>*bu^ctJ7K3!@X&7NC7B`$liT}w$1
zCbjWN)tGdVr)vEMilPVQ&y=#WnB`<5TbDnLr+z5^1Z`r=HlCtSut6`Ll6<jj$<`6<
z+KYO$ZDhf2MRA;c0XtHVM#r+Yf|;6@ID6MP(xV_GE+1tnJ7u|ZoS}!yM8i@Fzn3~A
z*{_9ZlZ%JhyRsfIZj<G%BtuVE1nIe&W$5`~j-jVB&CnWlrfm796*P}kZ#pV<8rl5K
z9;s~_`(^V2sdFit|I%a9XA=AM<q^#B(ssIx4L5)KyZ4q5(i<_DT{X|3E7`~`sq+x5
zCs;x4-9yMhs<wU^yh`nd|GT$Ti9U2aPj)I*;0Cg@3;M29($fjs;77#X7y<iiZXap4
zvF%&5F}UbVHS}QBzKmtHTZ&Xr@Y*}m;PE`Q+u7uoGb17f+nKS<Anjbmj=sE>9%6T2
zestL7yS=5Tu#Qil!Nftt*~AuaT}oTnk*&?rmW6D|wiNmz+q5k!BXJ>Wm=ksB?k*DE
zF2Q)r5sxaa5_>GKf@g&}y{v6pTskhgVqt*6;Pb@ZE|G&*k^T?3rpT0MyA)y71JUHW
zy9~96Y-ffSGiTH;;5*VMPr(aM;=gjUuBymEp5x<vd1D4dTON!x^nA;1y%NjzZy!%J
zEN@2~O=B0gkETP}&)d^~hh8gb)LU%zj>L>H2sZ}d5OO8@jq=B)QnDp4lC11{2zGX2
zC)CjT&pYf7J0>RHx`QQ|Nmg{FVJZ0V7J2w@tais#+QGitkwM$p(z0d4Cg<?H&xG^t
ze2s)R^jj?Nm1k9LPw<hm;gJT!W5%rsg9c+~YA8}4Xz0-+^+guv{QCidymjbr4lKHo
zNNadk#0tif)*n_7d*>XvwK_DV3k^UOw;Tjj#2Q+adkkOw2wnIN!y551KOc+7*eo(M
zJPv($Jju{X_890_%(g9#=?*Aa=v4(P+_{=Q#?I}`jLD9*UyC62NUYdJ7WL{lbavXS
zkJ31{<yBuy+mL&Mud!*na$<^y-cxL5`*v-hAFx}yo~B3Hg56{2LAGUgJ>AGg?n$F2
zmc3`{@Y?L(7R^`k^SkW(-HEJX&qk?rAsf7RxD@vgTf28So4GeCk;dEIeXE~_Ce6Vn
z$MOz%$g_5D3Z)C!9?N)zW}>~9HCj@oTeI0kONz8FhuyS{7&aHpzlI5}q6<G%po1R~
z`*UohmC?(d7Hgt{z52#u)bDA1qhbK9U~Bf<VzT4!Rr46@*}s)O=h^i3Kn2~+cG}-p
ztnbER%1u+#^K99HQu-A8@j!Y^_DI={TEFb}B(kHF2AJ<)*5DAyPLW3>>z5XG_h33s
z&WGQLr-Nv!tdFpMIU&U6zdN6HvbWzIL;u1~ygO6frnd9O9KMOon2gOhhs?lk3s9;F
z>;KM%lpc=B*4)cAz*<We&Dhr?%p;WHE$*OXWhcULS5kc6ddv0RQhxmQ!XEeK^1Gf{
zWq*?B3vAFKZ45pD`&Pe!Wgj|3C$sMk&6bv>vy{WrrPv<!)Zrz{UNnM7I8d8_)BBUd
z>ju%pdv23{V$+WljNHDkHI+sc<Y5<b)7P;7&*3EIJ5sydbN0w^iJoKMyl;zfrrmQb
zXImUebP;>op^l;H_tZ;Rt7E1jZ-Bj>^*D4f#rH>UuxZW$X~cC_?wmq<*f&o2jlSo1
zhtcv=^dIa@d5*Fz(H_M8{1RJM5vR)@5$@yHw|h(FzK*az|BE;p2a=vq$Th4lh8>*N
zhMg|SP9OC-wT!SYf50iO&xt05ofdPaA$bt61jqWEve^d}<CMkF;yike-Ka>>1V~%b
zy_7St<h)H<v`pKUF@ql$2WsasbLAfuFid5?R6Y_z)%WcG&a$iKO2umS$EsPhmVHu{
zk6U75b$s-Kc|B2xGZr6IORHB&=MvekCnm9Ps&x-9o)<nI)Dl^##m-Ph8G10IZ5;G{
z9+s1f@rkg#<|lfG<+)Zqyh?T<{av7kOJHnvEbH|SXE$6HiN>?dwHsJujhcT({=>JJ
z86#9|f739wv2GmwmR+bzV(z+R>cmx+RQxcz*b>i1)=$C*=i>UMc$PSVYdi+kkD))Z
zpX;BN>>75oexekmVS5`!OP%Vl!<mLj^gVW?VGK-%HKs}%Q`wUGDJ;J+SxQW0Hyh&F
zk;ZswOA@<SKN@ZMu5q@6kA{?{>C(9&?5u4BGdCq;zr5Wvo3^qGO|zvTL)hTv6x@Up
znzN*~M7DYLaJIWyElpOjqs<c!TvxC_^LR<4V!t*|q_@}v_awT7t#NOV+LGD#jicFh
zcLL72$d=hs#zSm=%VSa^=5CqDj<+PxOYB0+6Vfw-+3>~`lsWC#6Rfay5ZiZbDxN9M
z9Lu5^_#4k+JZh}n1W%SUWH_GHFux~9ii%;MdZuHpyPny|Zr1TERJ{FomZTZTnvPGG
zvg6p^V<}8=LM<&A$bR*VV1^UvQfo9jdkg`fEshQs*22!5n9akSFtH21@Y?W2Mx#%j
zlBWEPz3E*+=dtg-@zL|dHVCij9@M(WXQZ>(+LQ6ov%*6*zNfIT)XrwLt{!!1Zcl0?
zX>sB{KlKm!9$J9yq`jo*`4RUH?jxJ>5<A=ae8LDkv`DF!$3at&Xy}Q?_FRbPG(WR{
z5XW|Y5T~GV_7h>@E`;nXdtF$#1R*cd{yv1!18fWFH2XoWPe^$n+(;3wSYh86c3RBC
zO|$O}3mP6S*1nYuJC!9}8pM{Inl8P^F`6};Qbp$tk{^+u8N@c9p2E6MjpR?D@uwe4
zD4uRt)7h6z_`SQU&DEP0@eO~LZeg2F&qVKTo{i_0ADte3Ux+yKdqvw=d%Gv^jGFS%
z*Ur9pfAn`B&Pw_gw+xSxND09W=#H9!#|+*j=UC5kAO2eezi+Po=fvo?Y4$IAdqV}q
zQ(65-W1^<nf9mbE70?0fFOVW-2^;c(G0jkxgt6&jYz$ijX#kJnF6fLEezG)7_=Tmm
zNA6n&3aF9|{P><9j+|5NVs>|dGnqa5@#tuD7Rk5^vQyckpN)x9$Wrk*&$FLo(_wG?
zskgLu>!dH75!C)eL}Xp?HG-vV`E=JqSaK|2oP8k5F3=EGfO#TiDUYx(AZ5u?HtCxu
zY+6isN=o5K*(0B6F==?nNko3A#8!Yim**bGYeuh-o}6u1%%0@=seLV#=rp$I^NAe0
zKA#wkje*fXLFXgk1Ty8O(})M9wsZvIjfG{XpmRJcK-FQtP=RwIPXNIVeG!iNg^XWh
zj5{AzK70OTH1=@VJ#R1%O|#gX3*RFNPuj)rBANMe;L+YuW7Pydy`v<-Xu<zU{nz@>
zh5mCk?&~ZmTIb2Tyq}g|mL4%{u(|N_rr>S0ENg{4Sa7CbP>e6vFwKuod)z(7!5;JF
zNmMHNlnw1#t8sl!tUN*=jX_y9_mSDVRz#IyMf564T%H?UuTcIL?b-16BUCCp#|B&<
zO|@*o^`rqEpJUq(V@s~5Py^d^J&77T`>s!-88|4&#Et9nw&kT~^3-`s?e}%lxz2@j
zb6Mw&SiF7zd?Rg0nH4^N*t(UE5rahBo{XDsN$hmU7?(y&s!oG{)U~TuudbJ`^&a)D
zxB+a!k4jJGKj+c`hr`9A;1SRM-}@!^HJkwc>`*M<R=8mE#%<Y~w{Bdq?d6S|H?Uv2
zlcf2IQtM!vz%qNrvf(``?1%0VPZyCV*S-A23)|)v7Utw_URU@6*}QFCVc|slB9i`7
zxx?!OSAQbQ#|29SgMvE27>~Utb?}V;kNaozWHe2eqNa>j&wt?$>d8~R6JqF}>3z9$
z&|tbca!Ll$EPZz{{rv#BeDBS{^an*u@F)%Mm2MEt&7|$#mQgf|Qf29-SUQ_py|)q}
zmgWqj21WZB!i}EB4e(v5e>gQzvus_ePoM^`LyPZQqO9-B5pAW*hQX6~X*>|+5Ncmo
zt;qTKeyFH+F&30veDa)8A)!{4YElp)AR=iU@&pE1og=Jsh1x8ub#i7=B|_~As}&^=
zln~_?V?o(PDI#?XbwcT_(U><R)H<g;uQ9Av7#@sND8`Dsof-5c8uhx6ZL+L!2-zj*
z6O`8i_lle?9>3I@^J1(+Fxb!5>|HjQK20?tq3;${mdp9|#Z2%HnFYIawb1JX4gKuY
z_Y1i@bfp+H37X5jZ!V+jX{mb(P4K#>Q4@`Fi7}toIgJ+^5OR>q|CEO4Dt?>DgT>{D
zv0On@KU+~;x>@M0f@Ol*R{72{fnqD()wso=U$8?^by_wydWWnCcTDI>rJSHj(Di|A
z*CBUovR3Hzf_)t=`AD|wlZ|4$$ZOw8b7-HNxu2V|?YC|v-r{V!lxCC(J9j_(j(+w5
zZ_nefj|mF95XfuCp+AgjlvQ#uYC*kVUy^EX-WpzfuF&TTn)})HZ7ZwLmkGN2+4<#o
z=>ee+3U&)RPKaiLeJd2Ds+N<~dNqr&Wi$pM=L(ty`;zYPwk_gGmkGU3u%n+{iMM45
z>=H?hSW&^gIA(c8?K)+tbs0_IXUJ3ZIhvRw>~j0rcX=nT$9SoEC5-j=ox4_1Rh&y0
zmx(>t@!%fxi@|_km!M7TLF}y+G?%JfB2mFUS$RtCn%;|3^=X={7vo03zJg4mt62L;
zs>v6lRzZiL+bc)wmfNH83w?)RP_Qp*-(43HdS$JgrCRV`t*J&Y291JdL4!}8+$L|7
zM4QmN1^Xsdc{`s)uQdUo4+*O3<jHeS%5hv`FX;R9f>uEyip`aaMQ4iW$&J)JXtGOK
z`1&lx)B(2~xi6D|7`J)p9Qp*E929b>pI_1O-};eyIa7!5Qwdou*cad3@>@Th7%!9k
zG7Lh_?dNAb_FF&f<|2M@ScRRfpMC$jT;5xIxV=x<wfD2{Ur&dy?-F+1{p|bKq4aiI
zdHjS1xer>wzPkJ8Z{YTSmH4Ne*Z&%wD_ImHC^4uAi2+UEcrXZNa(M_LGr0`T<?>KM
zo&*EnO3)BX$ZF6Ht^?KhQl}8Kf!jbbjF8=+8GIcKg8M<;a6%4(*5SlL4&x#KMJ1??
zC!_(ifyY6TK*(v(41Np-!SkRok&v%JKX?VyjUeP2=m4*Ssw6^ggI4ek7y=bZC?FYy
zf^IMlBqJ?^B;&$3l8|)J0geaNDd+%~!I@mfo5Nf#gHLi<jSg@bT+L+-I>2SHkjoFF
z16&4ob2$wi;4-+M%jxI<mo2zB%oX?&t&%Ii2G9>42ZP{gkc>tLKs9(CG=N`&Ch!Vq
z1+Re)@H*%RZ-YVb4oJqJ1Ih3QV?Z+)2U`5NNXCT<@7C#{2^<dwz)aA9$JUvk8=MQO
z$D#wE4O|J5ap(YO2G@Z>un;tkM+ZPZxEs_>KnFkvxE~CG2a~CV7$%|vP}smqFbFn)
z#!2V^=m$@OIvqLyI>7UwYBD+iTEQz|2)qWGrl1300K5$vrlJF&8&r%$0eD@C0S#c>
zNDCZrkqm_uOa~p{c+d}Kf<bU5NT#6!pc;G<G=M8X6Sx|*g6lvBSO|u|ZJ+_~V!J^b
z_&OK__k+ePbijfOKQ0b~Iz2i7I=}`{H3J<0t>9@e1bz&f9zh4d0Qfa%coZD~-QYD)
zJrf-O&ERd&58eS)v(SMQ_=7QE0JOy6!tfY60J_0+P(2$R0Bv9<NDSxzXa?tkLGVe?
zn2ipAesDFYdmJ4A9bh4-nu892R&X~M0$&GBbI}3NKNt7^L0qWkp#z{5tOSE#18AI&
z4uF2}G^l$59RMBRd649w1E2}K0{X#gpn3s10Gh$uTn{SnPoB^sR1OA~V(mfSO7H)?
zNgtsW6~0~a?ImOrHAUoG30Xjb)DJ44w-ys}j+zu@uMx5adgbecyg*E}1bhQ>!W)FF
zqCx8TcS2er=l_wApTVLMLXHrVq}xZxKFIChD#X{nMaV4f58}9?uD8$y1eCP<37Mw|
zN>&>oRiN%2LUwWk@EMA1N(tExM=Kb~CoJO$n5f}BLNdT2P$LB++7A&j3UW929pZK$
z=6g3t^+yQV1Ny<L2vbCvlaNo4P<w?16C+?k6&z?#($*024jcnuEgWnvWGn@xU>z#Q
z1g=JmQ<GHGOvnuQ;Zu*Cz=Zg80)rHvI`}sx#*Y(n1~ML*$OI4%F-R;(I|NlHd159h
z*MbYQL~?;R7o-kQgUaz>LFSR5lnZiw2{;O$$auO6J9Z0M?UP%H2cK{p8<+t*Kgi>B
zfs-*Y-d)0-!MjVCc#aI~EqIU&UnG1$NFuQkJ;+;V1~cG@_Y<s@iFOF$VG)5%R4<qh
zE`}Tc`9yf{30G_s#4{+FfdV@O@x~Wkd%VK(LM#&Aio=e0!4Iz`IV)2Sh7$qj@d<>i
z{7{}C2jpvS1-YG1Fa+{Mbbi^+401ac$O~*2db}B7$t+kCT;yTR@P<XI;E1mW!U2oG
zVQ{d4`w&3;8Q=YWVgqM^0dOOzZzm)hbb%S5^&H>4m<8Wo?E(4sSmxoBXulw&3@ifi
z1qNvcyDYd!I8TTwo|1e}5Bfk8sJg%_@e_lfS<nGafIc8dF3NT~@MHLy!1JID{2KIw
zSHKW>4OIULX9Q>jEw^!D#l;=a4Js0F2!S!6DuBZRG=RyV8B7Nq;CL_qW`gQVd@cPX
z7vu?-fILATxEka9zxsus8{7s4!QG(hE8PFD<HGP2jz!Q69t0iWVK4wzg5+zQil7cW
z4qCx0d>rHxl5gY$^dO%&A56vpRtBbn?cjK@8_Wc?mvO9vxtA?C*Ktt-#gm{9TnTo8
ztHFdn<6H-Gz(TMH+y=V9-5}|ZC)9zJkWF9%XakRfUEmea@GTO;<Crl>$a4H@acu^7
zMB<M+74NDmyz~Gar?4atLR?O_)93U%+noVthcoCTI%EPpaVQ-sM}kA`&^mMuy@MD~
z3e@geUu~#XTd%9nuXi-K8iEZ8P3orHCR=k^bD$XpMpOVOohoOJ(^_6q?l14IAeH(`
zb5&85yQ-@?RIRNsx$<2Oi>sqHSesC1tk12t)weeU8kCKOrko~gldsv|jJtq5i9kB1
z-sviLm;1`w%hi?IN?oO)vaHHc<*M>kscI5x)HS-A5|_<Y=5o16owBZ|-dZ1O=x)ew
zGB*XAyP88hDKjPO0OH7Xnw<I0B4?nyqudfK?<x<K=TsUib1O}i`IY{v_NqWtM^&&&
zUt_2-)|hJYYuqlMtKHS%3c9p)x;jIhu`ajHQSYixXjC_98}*I0rm`kSle-DQt(2?=
z<i2SgR%eOR<}7nMoZaQ5!dzKYX|1$Xc2$L{x~r7ch;6AUs^Nh{E*><$uDw1`AFMYt
z<}?}`a~n;Kz9xTDdsCpPg9o=^2LXI342~R!(ZLta?^IW4D|8k53PVL%rK8eS>8n&#
zCseDewbi=nk{Vl0S&gH{RYPi(wF$LFb=JC)I$K>?U8ugho;0jgHmDkiLta;7LvBNU
zLs5gZp}jHC7;FqRb~hTDbDDFT^PA1Qg>FhR0iVO~2sna{E{CpMU!GH*TkfiGSNJOe
a73wN&mA)#cs;t^k?XLD$<9q*C>Hh*9LnR6T

diff --git a/bin/mimalloc-redirect-arm64ec.lib b/bin/mimalloc-redirect-arm64ec.lib
old mode 100644
new mode 100755
index b88e8fc11d671ae0aac56ea260ca608dd311cbcf..0ce7743647b158d25a8dd82854e6fc34a0b6b28c
GIT binary patch
delta 98
zcmaDO`9^ZXKNe1jwv9ja85tNjCLiQ5n{3P4&iHdOKZnNTx2!HemIH_MW?wdCM%HH5
qoKvSK@8yu6JcYdqti%8;UBU8%bN9{3dpHgOx%HenP(8;vRha?%|0F8_

delta 98
zcmaDO`9^ZXKNil1uk8=$Gcqu6Og_kAHrbZ7o$<$Heh!VvZ&_V{EC&wh&Ax2PjI7q~
rYyX~}yq80M@)Y(euo45XbOp;giMb~y@8LKE<koZQK=mBwRAmMLHvc5S

diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll
old mode 100644
new mode 100755
index 9e015cc62e84101cd5a5e7b0d90b67806b5db9b5..ec035f1bd61eb9326d65f23594864c139061b3f6
GIT binary patch
delta 7860
zcmbVQdwi2snm#8bC6!(Rr5Y%-Uxz@WLMNqM0$NF>tRGM{AVNWe6bhD+fe7uGbzvJo
zG}zV&9)Dxk(cQ22^d5{ty^IDytPZ-7b!X~jhIr+xu(RFuRvBlq&-uO+bo!gWHoxDK
z=Q;0r&wJi;%Xjkm-c9liB?WGzv7^OH<u5Zfe|ln9;fVxJ{kQ%2vs&4H;u!e0;rYj(
z(YWyVagE<Tp>vT|UcF;$6>obq&oaTajF8HQlF81`K0o@&{=z^FA@35HenE&0Oahd|
zDA-5IhemD?AB7T|fe;5fN{guDXkIppQ4?)v!!)0|*+fJBf`Q9$^#S{UO5jHyCn1pn
zPh~}Q1<}JIJV}N_jO{Yy)6dzj@U4blHY}!(jk{yfd&7jtb!qSKpybRr6Oj)UzYR&M
zEdx9?`Q8YT8`7SIQ^w2V=;27sxT46#@p6N)DO^9!WLWvsJ#h08vUdS&br*Q7RI6OK
z#wy84@5ps;%5_=$;gWGY#h+@H>x>>0<ak6rV%!dO@hP;rQ)`vBmYq-DMn~AP@xI*M
z+u?Qnpj>xSK2mD6%XOtzkGu$K?=1O1@vZFB@#X2G+phRDq<z9xm~t1N`i%F##%e38
zGrofAvg==6UKcHEFm|CL<M=pQ;x9GFc{H0`S7vp{Nm>^TX^*fLll7(@Xu`*`$8d2G
z@>;7!OIrtP#_=^)lYg=a1CV*Q+5m3is=@eE-OV00raU)}R<IY-8tEUxYtsLaWcu?5
zJfF1h*`i5LrPkg>$>B)#e9Z7av5zLrrnj^7%%!<WFw2Kd+fJR;KJuYUHn}0|KQ~ZP
z^oCk6mw7U;Nqr8b=lOD|6Xvr1lwuam9G|+48?jA@`eGqBhNa1pfqob+x%w7LFNKd>
zb3di^j7}M(TiKgaO4D<&Pvp9KtAsY{Nj7cjb>;;VU^-&VSi@5&9>0dIooXw2`3^p;
ztd6a`ZZgd{UTU3-yt4;P=nMOSVm~~o`y4v0`@r1~i=N^i*(+1$)6MMbsYP@)yEeOQ
z-k(+@_v)-N%)1k-<KS~|k7o_R95}Sk>PV`|LiZ8PYXV=Fc-FJ5GdpL(Q<^Cgm5;G=
z*~_MsX=UT^OFWa;jf+v#W+jc;rya-)uhJ54{c9XcTxONj6*hKyT7}`hVwOL>KY8t4
z7<V4~aQbw5id~*wkY|R0Z?E!;saVGJM2T;pa*Q$UF19@9JNhPDnww`fZP0x^n(tK`
zSat49`XzfZ*F|3lU(5|s+Q1&q`(>>^34VuOvNnQAb!x_m2dxSu&dvCu$nX+m`@^xq
zT5z{B<0yH7k3()KvwGAMpTy%uk?qLKIp9MOu5Oka_yK^D<+{hM0p1Cmo$BQ@LS8Uo
zLFI;etA~;#qI%g2KbboNnv~T=_*i7@qppR7ptXa~HrY2-ON(X9u%`3mcn@3Tx(BU|
ztYpTFV%}7n7P(=K)y4a9tDWd4F_!qC+{pJ)B6od22Tt>$pJF{T*3&0gvUTp{uST&B
z2<_bd0@ei!;>7HiS?AJ6S*>+{lG-u<v6<3_v+|KLt4Tg^sT`vaL*cH*j}p^h8^8o<
zNnXdN!O3Id7vXWMTN`(o)uzhW$7M*=Q3eNF(K(DbAJMaJaL5Nv^T6V8;e^RY)>zGY
zpNB4a`8fC_t2N*1$=Zv~X}eQxWq+D^y}|M=yKhz|E12~Y`Vm_-D~G<yJhO7L{k+M`
zAIIZ|BN>MeprHMv)X7%$2<j)Z-^?;+R(}(Z^O==I-86MIi_Mxr3p5W)zB%X6Y44Bn
zI&?|q6K>ZJ(bGk&B)^pYo;{SGLsiz6@2Hhu$Jo#p0ZW3doN+v!`3Fo<O(AyezqpXv
zyuXOi$XIYYe#>z{1E|o4_A9N{Sc^7jdOvSkeKnssD^Eb;D{hNtxnU3R_AcyWoZLAW
zNE!CS5z39_q&gEu>&$qZ5WdgKaAuBB?U(h?Y55deZhP2}>SE_?y$0j$?7+1*(9!U3
zuHD1WiT(P$MQW@-+mRAtNo2oqI2l<W{fW0B{Tsj2)7K`XoX3{cPyrjdew4jEyWU`*
z6W%zdB&k-`kBLpE>-an)2fjJ<1Q%&psYCDU(Ioi*`r`TSN#S!T{S`9f1O#6u8&|e?
zglp_GJVUhMs*ijhk7F7zB>waK<K4ravgaG#dpG=={WYWE;9IPI!S3XWo00xLMi=fd
zG@oTV7lx+$zT&$CKX5pSP7JWB1Rg7Y8;{S8T(q%2m0rVKjtR6RywQ=KL<`xyH_RHp
z^d`7G^bTvh;k;x$Mu>6Wv@*<rw-f`!Z>JuovKUSeN7)lMwi@2~j1?EpGyHm(?JcgQ
zKVx4OUpIaUufP0u*m_ejrHk2?CG*B7-w4xzx7gE5vgwWN`6d5KOIf+|=H$6Iz`ma;
z&Si!te#<U6=jTbV@ZsCl-@P7>`}rnr$jZKAJT}OROSWWv-_3KI#51yEWPOVfW(Rw=
zWJ!Lf8joijkKm~>BT>7Z*WPzB9zRCa#b4lTzo8a|r{4S$opA=vm?2ymumELsPCR5w
zt!}x_jF>-JfL4cJVc#u1OmkRm=}N=73+!xZsiFP?o3!kX<O}mqKZ)JDOrE^VMoG>1
z5FLIJI|&gyM~rjBuP^(a&M%w~r!}u(7jQEl7iKYaP;L59oRW*}`Z=k|M3>pz`=J-v
z=q*bPP4BW|IXn4X-pg*bN#1S<M%Wqoy4?B?5<3)IO(r#y`7rU^PCLY1cd?A+n+=bC
zz&y*Zsr}ln&!tQK^UK;m^D&pCP&dlzS{|t(>%8VQs9yMAJ+O3fAFRVy+P?BbTU8ik
zb*kR>-$mQ*D{ZmSvfk3&@~kfP&<~BHR~rA3AB%>p6LyxrLMmtzcBk46XKn3jrt=53
zE3x4iJk(9t4rh$DtY*dIhA-`G!b;ot_qhG>Gi>q7*@iRraMj9}Ql|Xg!hcgNV;}e@
zEv)#?b(Y<Kzji(Ax$}f!xQ}gHE7MJ^Z*4YR%HCZ2vnl7V)}rz4|Ksb!8&|Vi*R8YM
zuh)+4tAf3>ZvB+>zh60;#R|$lG2GCDzOH3AuFo|@C$s*wQw`5eX8*XpNIK65iPxm6
z184ba7~esW?;WSS$oLv`4XZ7l_t=JUl9B?ynR~SFYVG?M+P7Exejp`WxM3q*^*-+V
z{PUII^!xSlY{5>!gQ8xq)xYuA;U2E={3Y}FG<X13u`%=3olyKnM$Cj9T+0PM(CyF#
z)!V$))}QavZ18hH6zt%C39Q)ND8n|5+xF;x6V|Yk8?RZ!WAi?AtM!2g9;n>HI}hH6
z6~@E8z^i*F{>Kz-)611?;@z2ZyvW|dW9t1BL6QJ$Kk6H2gAhK-Mz-qi%slA<c<UjE
zbQxA3%3JpC<zCu<qVVCnC(@Lagr-xH{xM<ywken1$6nc#Ll3e)Zo1yc%?HEDn}20k
z9><3_`vU*5Fa%;07(n?f5Cf!KLhOJW@BvYPAe%{mm1hd9WdBw1a2n4$@mb3bY{|Xl
z|GS1>QdYU=mTlFgJ9o+zyDHhkTc@-1ty9^~tvT%Gt=F;Ets7a)aN9GLRzphBZ0XjD
zJ<?o9c+U3M=?_KrmwPtODY{lmXjgUR&YcB-NN;uZPDH=0dhND7dw1;Kbz9BPw?MH?
zRU5*QDwh#|7mw^-LY45u2ON~q`<761c-5|jlzI0!sZ*CDd!5vy$rSZGpLaq=+0oq#
zDGAg43uy|8=|(fF@GhiL-BS`qhi0_vp0Y5yHDf?GdW12c8Qr=uD2x%!7}1SG!Z@NC
zM|7j<BfWPUEBWVzR0@A|5Ggr@(XAQXy3s3)xW&RVpc_NN7}bnX-FQhDN5gp!?xtzu
zJ{AMyj3aw5&69-5CfFfZ%sPpNoBZqO#4=$n7wisySAP%9^N6BXuwQVZH3_fkT}Sg2
zVGaq7jB$%;ofzNcW_I_Z*?jGK%c<!Tk#6{-U+$(UHc@nF#qi)`ei~MPSw-^#qCycI
z5}e3FAH$?UVIC43`y_n1eml(@Nw|NicV`i7ze_J;%kV+ih1n^%T(DcXDY~0Sn0<l+
zg7F_r+i@<<tuQFe1A-H|dMb3cL^&$zec{4$GEL+#GNx5@Oe@o8dK7=S<mp>zp+yw!
zf)l}=TmH5)m#7bD&C=YW=;5qy7r!tmf(HaAB6`CA6I@3p4GFU<c;qvEs>$bihZezh
z!HMPpt$ChHnB9WCf|X5rbEoE(5)kHKxT0e>&5MZQkl+!)iQpkUxaEQ##38s`aKbIB
zxiLqth5rQfZl+Td;Sn0+X%C+~wvHaPo0u(xC;gR@a?Fg!qW*Cwta&C?j9suzaO7^?
z?g}4!7Te4z%r3!If_=ixth@PyIUraOJR;nDn%k5CVU7w;q!kV49p6mz)P#9h&p{GQ
zHtXdPZ7wD`g}Gd?dstsik0|>ED+&Iab(%RK%u&H3f}NrlGvB?~Uw6<c<}dV)?Sd2e
z`ogqbo9dY^QR5LD5F8P~>{o)#91vAQg2_caIC1Oj(p(QjUNIcCr-q%Uq7zc8M)fK$
zOF5UD5)h?E{TL~t7!sUF&Q&Sm3PVKH2lZxAQH+gg7Tfx_%|=AMqBkSo>D3m&iQXb(
zdb5lAkk;%frzpC<V^^Kap;h6dzq_4Y>lIf2m|$a5Z`9p`!fjxT``DBRHTO9~!c86H
zJ~mZy8AA%|6Fx31fA~BNKN5-2T3qIC1x$NzkphpOh-(b4VrK9uU=%tE&Ygto8MFjm
z28=+vz!w2&56a+y$#~@q?aRUy35ddH1ibZXyex+HfU~cGACCim8mJvYL4g<s_#JQr
z{57t4gV1COA?tw<v>W^+U_l28coZPeWGWKM!JP}*1%4VZb02Uv?p|JKejnQaxS;vH
z%nLZ6aX+giZ=>KufnU^eB;3KFCGh%uWXv0Yp8?9DBjBs>dc+NF27d}vaev&KR`a_P
zu9a@^qd*My0Jvy2I)mK_t^%CU9`J{NeQzxsA-<XeXC4$UEb4!Xnc$7VFWGTpg^qwP
z12%qdEhOZAzz4ezd|@sje&`YKf+7qA+6jIb7=jLfm*ZAE2<-x|n2&__LWBRK7GEY5
zV&K36Bmm8M@<L2Cv;=+x7~p}xr*P8_K}W%tfe5s;h>!;Xh5LXH;1$3K*WgsVAsB@=
zgVUYZLtKMRB^V^)*P0<J0Y3^J@E-slbPW7FUUW1<N5GSpV)~#Z@K=Bvnv}u@3_yFp
zp92Bt5pd=**tkD9cR7;d8hkV0g?51(mSg=xC<IVA4aA_M;Q1?%HMA34cq_8z8XUxn
zBT|hS06%&=rj)k@r>sJf&}Q(jfpTaCJY_ZZ6SM?Ae+Obi$G~sgi3FgdcVhjM)?uAc
zFoO-{$ewHPT)>3RoZx!^GqeZ%1Ym(yz-IvoItu<4utAgcgyaEsXghc{;DC064+2i;
z0Qea|u0<gNaRG2akASCbzzCov@G8I!?FQEaRnP(ONx%ag1yj7#_CoVl-8R4njSC!E
z4fvsPOC$9_06GAE6=>xC8?pYk;7dWl1+D^u&>rw(KnPj^p9Kb>qu_6V2sF8yklDZ>
zv>m(=h(f!;`+*^7Jh#a)AO;-)UjkHUJg7<PCU7;@-waU#jH1E?-UpBxGzLEfn4od_
zA+G{v=qUJmzyghDH7NunXgpELF8~{~4}1);L*wB{z5yKE-wi(w?hoGLuEZLk;He<w
zO~3^m1ux!$CE!k+x1t@i8~i@tfyTpttUy~YG@c{mMZgD*Uwx9i9n%gife!*zHS8V3
zym0!b-_Tl#AgL}#m!~V(9qLxQrCwXFySK4V=^N@J0leQvy;-p+F2&yzXc}xHEv6P{
zi?`L+I?y`OHri%yuj=r01Uq6uH7IqKce%S7yQ1Ag-R2&-*VXIqjr0xnk$wbGFfNco
zaVnvvfhJpvy~WWY*S09F!PZb~q}9?cwcFYq?SYQQ4y7a1Vd^w@S~_i=zAk^)XgBHc
z^m=>MzL7o>!T=D+tyC!<#itB4#hTQn(I!_*d5gQHs>RbX*cxpeYK^t3t<HA2-PK;+
z?rtCGh;&3dVjXISz0=Vtcb0d$JA+-JuG&afv}>qK>aq1S_A0%>-hp0ozolR5xA&6>
z_7FPqD*>fZ2`Z%7)NF3HG)v9i7GKLq>u4)!Gq+W>d)mG2{&pUEw1bEBbn=L5m)zs(
zarZ=e2YaKvL%p$HN58XQ?sxT<^Vx`E6+xj|Wk882gGyAfHQSq=%}PtKCDby|5^1rt
zNo}?^M_Zu1v0Z5owujnHL37X&l!CUPuhZYz*g4uox=r2YZcDeP$J^uU@%IFJNDRw~
z!1iuOx7=Oc?d}ftgnA-9(Vn3msn6Eu=#%?geU1G}f2cpwKiF?0BiNv5U~aNB*_s?p
f{^mfl(j00gt)^B>tF6`B=4%VIDQ&nY4AK7sTn!oa

delta 7813
zcmb_hdwf$>w%$85G=;VVOKnPlrV?m?0%;30D325h9<XS52<08#VMGX!YDK(F2bd|=
zYH)M!bQm8gEqy<NxngDDP8boY<0InZ)+=fh)C2X7@p^FzKb_ofpR)@?$3Jg=zjeNE
zt-bczYp=b}Is3Gpu6~{m`gy0(u0zvjOCJt;@$#YV-@no;Uws!j@vNdpUim$6>y>LK
z4lDTm39o{Quc-KgqA%LI!Kw5O8Tzf6)pn@+i8x;|As-Mz+-xUJN)2g2dw)Jk%o;)@
z7a_yg7n-aX5b01xTHFNprV=;2a@hAYleV!`P38da4H!I-J#4;aM4}SNLkZ~*(`W1v
zO(wm}UcmM1z&Xv`^pHjOY|P@RuU<jKiiDF}DLEITBhtR<ZJ@+$=)jdo4+V)-nQ$0R
zgZ?9i9tal4<OaKArAlpC;Hem$=DxjK;D#}GPl4^BZ+NVDlT@+9WR&{7Csn*DRV1&5
z%b*i+<?(u{LTiU2MT62o?Od?#=g@1i(ko#K`@H`q8e(%|J?Rf@hS#`Gsp5=uu)t)I
zDhf<4X)4(6(Nfv;@$7PJQDWDon?98Zud(^M^t)@m;^QwdnF}klFQU0{+@I%FgbFLQ
zPoiPai5Qw+UZ9WiXlALR&}5bRDMM5y{EF4+Of%-83!lp_P4`sfHARV*kO%9a6H83G
z@*z4*z|4nLCNP4tO6?%kO&`$4y^Imq9}}wR>A;f2Py6XU`Gn__aFI>D<GJ{?D=9e;
zbWcWwSF$he$f6TjV$$sNODhm%-&yl(Zz(@%-?wI|GWlg#bKj8LvY9I>CH`Nby~>wE
zuE=KXano5SDK>r%H)3xP`5wg>Fb*+l=&3;do%d3DHgGWIQA*!pG_{k?W^bkzB>sH^
zPv$9;5q;!*Hf-ov{pNu%9n^len5QuPo5gJDP;>qZT&qm}Eq6aSgbq4UV9G(>X&pL@
zh5bOWANHs|8_udeaCgF@rnrXvY3O7+lU*B{OXJy?w8Ba6EJE(?<U-WlhSjn1+FPQ@
z9WZ<M?KN5Z6(?hOpW-!;uS+!fFl$U3F>tS9N<!l<b|Gy}YL=pFubkqUoYxM8s#IkG
zvkWUsVhfeTYyTd_5*M0`@|&6L?67s3@29cM;qCpWtW@TGarkh0iQO1JAtSPaXJ23{
z>W;@U{<T-*8>k3#{O=WP-iYgTKAWAMp_iAdzAnZ0>T>2zA4T71N7C)|NT55tfzpTB
z;f!ZWtNOuj-zifAkhtRqy|Ulr2gQ98n;X2c1liuYt*{o{?OY7ft~hM+@vfDHCYStg
zd4!zgGm|QxGI<b4K6)t{%?)lwvbGLx;B1yux!=SUJ_kvvT!LG~fc`f5s{!b*<BtvA
zA93Uj?ATp7BRMUs2=WO^*j$+y23@Uu3CP}|O0Mk2U8Y2yF&|;QRI%UGz_Lb;oW{E<
zwc_L0d72J+NCIvuF~Ml3RK+(}?}XG@mYwDGV8Qz{S^dZvw4QxEGH1vKJy;(^w{1Sj
zC(aY5EX$Nbx3jgTCt{+>Q)aTyO~$R#!9tTxD*M*SCxZI8lnHV7vy*Pluh3+d-w*c|
z$J~KhDyff^DhrS&hOzLBZ9HSMoQf@62xcurt2y^V?i<nMCuU%@WV=-=JIhBBw;}FD
z>EIHRzBh?-US}jx#Ssyl@<b%TXY7SVw){<<lO^{8pOHmg%Nj?G)4cy9`)Jf;`W%ZN
zJ%T>L#*H44#skQYeHo1&2o5?>rbI<56_QCVU>ir9k|zEbjq=(Xm8VpNG)zule;7Sd
zbLAQfjvkp>c9v)I+FN)`C`)cp@3~KN7|kr8zhiSVN6-egDbreNJ&(B92n1*Za5Qn`
zb2ss#7}cT-$iYOg2W;4{xI-~8YAhY3+$Irs5^SbX(I_E&=M-Y69i%Xs@yxQ`Dwgu>
z+I*{G``9Y8Tl4J$?1H&f^XdJpY|K406gWL*7p1MtGH&<eX!0{wW&0U1jFCK{y%Ni}
zSY<+F0SdJ8x>`?Vjz!9_(d0E&_R+W=7Rq{3<If7L8lT^f4bL8}S^i;QPWJ0sjpiK|
zpSPp`nOW%n6D!Nxs=4cJ_HCYjIJw5Fjb{w@JvKr@XFfds@|S2dC)jOf+vlXPkaZwE
z6S!zi>_-o=?t4bZ*35uQ{|n48?Q`S)<AiAU4lBgmOA0V^JigR8XQ#s{_C1y|y;igH
zYj%42B+bgJ?1$;4^eWpuV{Gh;ynWBRf!Z0<DLulzoH;3W^E8;~J8ZZujUHxGZ7<MT
z)@7U3|Ji$B|Ct%{=V(&iVq5YjXLw*yx6;ziXtbQKOJ(xoH;vhEu+#bLlUKFy-0t8R
zVV@@?@-SD}q**gF$6Sd<2b~Dw!9B9K_1xWP{r*fedYsB9ui~D0Lk<Lv%sNF!&V@57
zg!eWUwy?s6_gsO=Ayw!R^Vd_*OB-a56&#>1uxP=3nnf}zoKv8Qm)Wy(7Wcn*GTNVI
z-E*WN&zmVJj`h<kN3fKL;5w*%GH~zQpXlU6-06eY`Gc9i$x%vFF}_pI|5V&J-F1As
zSx9jbhJ2iR_dmr-q}iH`57}uct^Xu$`i^}jZPu92u(|Wbre|I3-6G09Egr(BiC1UB
zCES?uU)eMB)@o*4WRZC(r4MGSwY1BR|4Er>CTi&rMoI3^BUL8PR1BT+H~*ssHg@lY
zb>7XsGjH{kZuWii6Q%D&(YN_#Uu?AG@v2(}o>Bd7b++8>{IuFRC7adHH%`zByG<Sk
zXJze*hx2!)y?4X?V8P5jJ0Fdn)BY#BHvh0@aW<>C&m21yJ>?<i*~$B|G^4Wv@_nb`
zQYWTj4cvyl@7&Z>cKX3(hVzDBY(37hmcF9d+RnaOD$x^c!m>2_2R47%uTt|;6o0<`
z|M3l;k;2|yw#@MEoxd3ERW_|?dFtk0ZnWRY+KMh|{@8-C#;{Y%(>3N~HgVZdO+qre
zzC71B^DrUN;&{2_ZN3`XY^Z$i*rci2QQ*ZaI(-uJtcc0#iG$;)8_M-3<@$H!O7QH9
zj?yUC80Ff3K;ZbtA-Ya}gg*ue`AJ~nqpEHeI7{GCf!hQ=DNyxRuQ<VP;S8@rG6_}K
zkMsF{ws2L_g1un;#6<LjXqIw<A9xP9a(FjAyj`)uPXd{+b>lksIurIebl8S*+b(V+
zWFtGiDrG8<&Bsu!ragQ1Y+TO=@7Rgu$9r}{iR#_^IWb|As=FCooix52vo-LT>NvTe
z#0T3yjEx%ykzhN=@>eHikUj8LLlAO<5U&UN`rW&^7r}ub6ewOjn8qz2bg+gDri0lt
zYtrd%*1l#0UC-WGGcG{aGR;E88<!}4oZl!W1d||IA)U}KLLv}jIw2N_1LA>%AbR9;
z7i1F86|$0jy6(3LJoDaPE!(+1z4ZTURS(20a&McreXBcv`|hoa-MhAK-$F`A;l^F}
zZgLlF+a|5szH!?oa)I9Ow(r{E-n7BJbn7m6@w$QA7HoQW$F9c+c`$~#oCcP)VJu7B
zFoONp`k`^#-5a-Un*hNl+~Suunly1aS;o1W)*G{P1LHTJr?*u0^OjZGoH3Xh`;Bu2
z|8T=E@R@UwHqQ1rFFGMb?D&p6Y7Fe$o=2&ZWpAIU^)5qGVB!8;${Kdqw82}dai>ii
zxusU^vQdxHGcGK`MU-%M)kwnX7zTEzIFI^Oqgfbjicud}=+2{Yc42fW#xOjsR3~gc
z#b#3-JA^T$7+tDS7DgRg_b++Wry7l4sDs-SV^}p731f+3)CUH8Ft}G3{fg198s8Ge
zu0a0&9W?H;P$P<(LB3Rj>jWAEDh0;H9yn9JjAmpBvsIw2kDFd~vkP;PKxZE}kLu<T
zX0JeBA2)~MHu&F#IVdm`I9|SmGJT~XaOz_RjniEcs}i7I%*-NG8`QsP2786tXC@88
z+#xX7$4$&6B+OxfJp%nJ)G;IKO!Svk)hN&^Ff81#9e;=N*@f95&?V3-+(JsU>~dkQ
z64;x1#G#JTtNTTJQDEKk_tNa3P(yvX+1K6PO&0AAwOfx+^<UlY7VOi_BHEoww*;F|
z?Huvuc!-X23X?}*mB8M7BWt<q!u^YBMu%wX6d3B`D#{-g<{p9iuW!#$AIPX%Mn_qM
z*(R_^pmB}r9=thloJ%y72g>Sq&<vkYI|POV_NHu62FU0UX2UmXK&!xBH@o7N>JVnv
zKQ;5^$6u6KxY@v;8>*JtApAP|_~RKQ{6lR9il>LrB)@*JPF7Q}tyVD<*x8D$WfW$M
zKuI7GZeG>RA<Qm;o?czJk-!(nm(kQJVfG8WHP8GLYiUNOFo%1auc*0rM7>>V)r`Vy
z6=)OK8%<s0QFdW=3iJs4=he%o5>0-AA%Q_LuvZy)ctn`>SJk|F2P9%2tfUzh(PS6s
z5@_Qt@b{}zs1oK5fnk9j;TBTd0>O_<Xu$DWXkfO~qxLTn*gJH1{q3o^M7vA92|Ysf
z_URVdaC<kOX!j`H;`~AlvZ?Q;$AyGeqBM>T3$;gJZz{Hpx5w0953G3pemcf@UA0>T
z_WJitT?!ok=m9#WNVqxsxc5!DL~$QbF5IfFv%BBLSGXFaLs&ce1nHY&SaDbD?7a9Y
zEyYoB0mQHi2QOgkU>uQf(9;87gXmxn0n_gwqz85*a1MmPZU=TlEZ`Af#}GoS;GSec
zu0q1_j{rB^i5TE6U|I?xojeZkc}S@Y#RqZ)!cUkH;58f|yTC~*A<H2_a0l=R#Eb!a
zz#fPmoD4-mBM7m9+kwwRjNAv9hI3sxI6v#HfH=VUnXd#Qf#d8~O5TBDhr$nmBaArr
zfE$6!GvUBH0AGSQ!Gpj7X3PX!54;R1;r=*DErRH9XmtRehD2cZ0&}yF9PBor6C#1T
zfEOXw-K8iK$p4N<#ysc*LaHaC65xK|MGGMf;9+1|HfDm(MqoX}%YA?oa|x*ew*Vi3
zguy+)^AH(41e}F4cNe%FIDay#xEmb!ekm?`C}E&?3ibgw$02zb2;2yK64HszUf}Ea
zYf6v@1>S&!z>QP!F#_>(A7B~&`apOb;L|g)RNM|Mx50<oftMj(@X`p#>U=apaR8^{
z?<ziU8}Km11?~g7XQO=J9-s^ff=7Ux3ov_d7mz_pxIgekd~h^y4*WC310Dh<%tLj-
z^}vF8SbrIc9m>lP;znTL@j?WLJqR@4k9CFJ0!(}WL3n52K|87f?ggeTMDpN9;O`-J
za365(Vw4@+0i5?B_7=DuxNj*c4(?rw_5bxUl!z<9mmqqc1n?V(0UYOQl2U}=;6`8}
z#0+i+J_@mbdw|stE4Uwc0b&CW12xOB>A>~C9Ecs<2HXNEDuv<#c@E+L_W|F7IKjif
z_!Vde*8}q*{GUN~U>U>%juRJYfRuy#ffpcNa2(i3{7NhhxE?qQ;^Y3n^N<Gc5U^Cn
z#SbL{ykiyC2HXfNgapCuz(*jR;2vNLBn0jUUW9akhk-vs!r*vMlWd3#j#oR`28n>X
zfKNesz`ejT5K@fw4}tsy(G|n61|P%_JviQ)WER8#ZU^p#7{NWj;}A1AUYF!Yhy@()
zM3N1$g5%{!9)Z}nKQIW9xIgeqh#frQz^%9r>r(^;M@w=N;^0oeuJu>~aJ(|eiVY|`
zxC8hm!~>3Z1G#4-$_|c4Jo(Kg%nIBG)NICVi}49)3nYI1BP}%{H+_?#$=+1n>}~FB
zCat<wTWd+1r>&zc(%#cfyhsi{7N4`)Roze>J}MtI))dt`YO88P$GVQ`>!f;neR+Mb
zp|gQBTAOT5B~AY3j^;?Sxz*C@Z1uG@w1wLcz^BGB_>4Ypbyc;lMqgv7F_+eOYRhZA
zwZ7V(W28=3XQ*@4m(+Xez4dZKq@kxl*XU?+Hgz?Jo9(Sdt(|S5HsZ(R5zpq6e0HD1
z*HIm;?yT;rw$xZ_Y&BAiy{4hoU)xa|tnI8d)|u-pb=EptT~)oW-d`WA@2uB18XC=w
z)<#=nd6T!P)Ys&1>S!V@x|WhwPiuK=RcoZJr;W7h+ewfRY#ZYAxqKzQa$mSwu8vgq
zRFk7cHIAB4ZC7o$Hc~6q+3SkxoOL{OS3M7FZ{!g>o6Iei7F&z2wV~DD+R++pHMASs
z&Fz+UYdeC5u(AMesjtfC^ELSVKHX9MQR7ihO?i#CrmDtQ(^E^1>5dtWx#~*lJay%D
z-a5HHQr}Zg8gvbgMrUJ5V^>qSNp6ZX^)%UAidq~k&K6e-31g`cSl?`DHaA<FZO!E^
z-WFerzonyvwCUOmZRR#hTS>d8-P`VKZ)i7@2sSG^M0`CyUA3XwdDL~(bJTk@TqD=?
U)aYu9jyaCGj(LvZ+oOyAAIj4H1^@s6

diff --git a/bin/mimalloc-redirect.lib b/bin/mimalloc-redirect.lib
old mode 100644
new mode 100755
index 1d710c011bf7273a3d693106fffe03f4c5b5a1ab..785fa4751353bc3375ee9b77b6942505053f63c1
GIT binary patch
delta 88
zcmdlbwo7b-1Pe<=?vksMWmtAG{+!Int}*#Piw)z?%{Huhj4U+@Hg6~Au}uQY7ywBd
hR_V#U?6EAimrlHzypA2BMP+g?2M0uQ^G1$NCIFTDAIty%

delta 88
zcmdlbwo7b-1PhC+gZJObGAz3oe@td%*O>gD#fI_6W*b&LMwb1@9(|si$2JKpV*n&=
hSfwZXvd6L*mPy>4ypA2BMP+g?2M0uQ^G1$NCIFkzAE5vM

diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll
old mode 100644
new mode 100755
index 32799ffeb37474d9b899f1c34482a6034580014a..92578f240e430f5baffc000ae3c366db690fb75a
GIT binary patch
delta 5587
zcmaJ^4^)%qxqrX#FK8%11A;~kiWL>~%m4ogZHeNRXu^@|o!Ew!dZR<jG_<r=4q)^Q
z44h(LuVuHc2Uogz2Ww;Fz*^d6)(Tyj;zliY?4Gp4?xSQ~x|_OOo0<E(?-%aeIXl;L
z9=`YYJkR_5d!P4tV_QJb77+B9nD9ySBUS&M`}oZ8LHY1Jz4vE!=mN#;@Ok)dn^h0J
zNx$-;A^N>G%zu>>SMPh$L-w95E?nreA!KJVkmvU2C;#w+jQ%u)q_~|?;$w^yPcTB}
zBYcZl%ame9uvQ=$#p?yznEv2%f>!3m^vd$_Atb)qHUWQ;&rAUFE`Eza2z~bk%6<1J
z5gZRSr7J^o85bvl-%D>`G*qh%p~I{FHes4A;m;wQ6XK4OMEfiOz7X>&`Odw~3$M0M
z;KiBWX2$X3nS1kzWP>f*X^Z(CF{7jKa7nv)9DkB&(eje`iH_@_(&4!0=XGo1ep@un
z27Kb7;{=iVYocj2@e{Ut9Y-hd_N)Rq*^3~oLxQ<=G>8)pVF%s7Tk&0HI;lA-gq(w?
zvjS<%7~Zn*5HlXUwD4DgEMfyB*#E%4EevZI@$uhyRX+0!&dXiT%wSWlVF}^xd2by4
z)%YWxNkID%*S-dK=Pu2rh~XYvg9Eu+cqHH+So5a1RKSb{*DYGhFwvlEv6^8D@b@Lh
zm;e^#7ccNU2gx{uF77ne<}ZKzoDd=372&HhU>aP*U*whK4%zM=4|zpYZ*LCKNWYX$
zPU$ilZ6kh(mE0l6-;RIaT|_0n{0xmAC6d7FNX!SyaJyIHV179h#-HYIg1>nMs%nmU
zSnRinoqmVN<*yM|C-Vuj$JZiCYbuzJA;)hMIg|M`tSjkzE{hPUfG;BSnRye7_yyy|
zQlSceSddf5^D}1u0|Fh*Ac!;;cK}BV>cj+kDprw$OjL8`<+yH1y#STt(Iro2&UlfS
z+k@G{WlSEn7p{>+fiaggYUARt@Y^`2N7#gq7Y;BHEH7HVQrg6er%Pc$?D=iekj*4y
z6RY;<@PQ&N<HWBO-DZwr|I%7!8gokzw3fn6fgF)nm^*AEU7n>3&Hfc4obP|g_-eWs
zc@Tu%-5tJ{Ou9J17s(_XkMcKrJ8NPW+dzA7r|k|e?R3OR`MPZ}CwF%NJw}gEK=w{0
z$lj~7bnmDw0ozb(Hw5ZQC$^6hxifdz5xX%9F~_T+r+!_EP^T^GYl)&Gi%5WaTjFqb
zdrN}sI7BX_Q~AN(PDgmJzeWtfd<M~W+GhNY#EnmZ&0Qj%f&T?>z0(%yc5trlns7CD
zoyJx`(}pv@TFE5QMRGL$+k4)_&q*I*s`1;>b&IC%l6cc$>wd=Xh`6|&_>Q!Uc^<D`
z{zNNhUP}Uu`&#0tg@lvlhfx3q=*?)46oPv``{Uf-AUp2!Kk}6$LB}tVEFikO-Bj8M
zTj2_HL+adr@fV=m7Ik(z;;#J&it*ut#8=GM5}WtcYK!vDQdiLzDS>I^<j($2D(2XJ
z1k=UE$si6muvDbpY$&hqc^RUIeS_p|i~Fl&F0P8-m}ZMXzN5A%Iifkxj*_`i6Qyc}
zF0PnY(}Vw9Uk}+&WD&op$HisdXM*$BaL|S#Cx`e`hyHrp4-Mifspftkr$JI*Gcelx
zj+xwH$wSrLBrkxLN>GPV7kB<X-Ei)34G6;EZJCSfy-zX?9Y2>fyFHmqDnr<@swkUu
z;eLjDWewiGQdd+1x?t4Tl0bBr7w^x8<8pHE;J%e>TE(e+M{RME_NXmJK0b4V%!Z~i
zkBr>M%W#Uc8>kg;fv1e#MKDqbH-n5UqMq91UEB@DC+A7dF_}+9Fi?bW+2Ekz3_A1Z
z)q|-LTO&#)p^gqwCj+&>;p}|8MrJW)|25?=3R#PcHPJ&$K}>Fm$mQuJ5$*f~(GWrJ
z?ItRuV)lt<7k3@MB3sQ2;L9>+W+hR49~TyvW;T-LOL%j!L6G<wA1yY(0Dh;qh&hcv
zF8-HArmv~HG&qce-zBQ%HsFU>trOh1iw~@-W4SBEqyr<qmI!Lem<>RAJk2m{xa0G8
zgd<mrj3({}p!(wKzI=KS_A@ZMh0guR6W#>s1{_?i%2GpWPHqr?x%yG2G-xYv3U)&l
zaOyF7rSJG_M4iy4WHu>i58e7KRf_}+08*_(Fbos-V6hj%g$osf0>q$G;Y!^hCr=*=
zB|R}At?0i$xRR*~{`|pr(*)1m!cFTfSsf(&3sd;g`fm&VI2Fv=u#;Ij2Jc_Sks{)g
zTplt?Jt3l}#?58lT@a?@6>1~He=4&v3$U_$)k2OiNu$4h3%kqhf}WfB_3{-9VuTGx
z#cA=)ay?Uy#Ty?MocI&ov2mT?g(z;_xO43tewLD{RTF{Q&45$>1@fq#1*b#5BUwxE
zb@eh_xan<pswX$evKomf_zC`I(^4ji3)q9eKhCbrIz#y1+`>VYWwzoOcI`^kOkM*R
zDOER#Az_Ct=@Aj0E$-xEH}QJ;?nQjYc|^@q2r{k!za%%67e#5obB99k?5=}*>++Nm
zY5NUo2`-lQE2;s*sr*gF_ZZpSp9v<<$O<T%|KW6U2L8NiPlD9%_t)`B<pJg~oK&iq
zby%g+3cmUhA5b~7RL_9;r`PZmm9iiPx5Q`@Hf05v*-bDY!9r(FESRHynvo9@_Zudu
zdpa`2eo1>Zx9@Wr2DHxHFd2_c63-Q)`O9(qGmSROZHD#XFrL#CWpad{h8JiZ%u{%m
zcD1zqc4{x@YvEASVwz?Z{E*;2{t&;U-Ta*wr~=GR{>o>}&XT^bhlt_LspiDjX*7^>
ziN^~}PaBbtR(O=(=;RK>s0vt^d&bj6#165G`}i7e)JZaGpp%wkr!IfZIl58o<X*Y<
zcas;1$q?NGZ8S53Pv}-`8Y37US#E&Wq$kuQC1wfRGX-+a`Vp}rx|$KiAM5@Vesx;)
ztFop@fBbk7d-SWB81B;77R1ST^A`N$PoFrxr>`nV{4-Gk*BZJ6GaukD3>C|emTZQR
zB!Pn_&PC9{LKt;Oi*3d|OVX*lfE0?qPC{2QKV6I8GCGzBQ?h^f%_Y?0<)&>*ME`7`
zNQ;k}E(lJKVx75SnK<RTMgpeq|Jf$i2C><exi~b0#L1=w+~6n4<f{an4dJ;*bm<Z|
z664~^6~VKD4b18vJ%P{&&`qEiknl-_df+?%5B@`eH*78vAcQTO@8q1XND0IUeS{y{
zvN&G~jE(#1>%4U(Y9f=m5h}s^wmhr9Pog3cVzw8)DQykmp+<zpHUp0s00=h`*>O>o
zHFE_7g4`#I3pV_JAwG}n^@p}R<*jUJsH!_ykF{Hu<9Dl;V(Hcu_%BuK@s6#7I3Upc
z=v#tpWtq%TcSxqt1+(oz=I;={@7S?UxsC*b4tnbw8a4vKy4u?ShPQbied^HReNP|U
z)^y+rSm71h9t}2cn@G<<{YWgq?KR7oK73>6%Na7jEX5TRn*iG>c2m3yu!rI{ioJl3
zQ5+_i>B8rCu4F<~b`E4CREF^RJuhdB0}fMML2(qY2-odi#>glxc@2q86uT*Q0`^ec
zM)48Ay%hIRJP3G<;t<7QzzK@O6ia@L#47B1Oo~ssU(Rp>+68DhqaIcbWR=$pt2Ur6
zAX2#yXa=(bDbxdKATI=H3}`aN4O1=$C`LFx;df3@t^|-2s4T^e5^jbGkR8kGhcn!;
zYNV@UusTIoeXt6URo)1!#(}1Q=A#+NivWrf&N-gTM<oU%15yE1*nx1JcR*|hR14Gy
z<OVKwdk6D60GtBq2a*9(MpTPN0F40=N6yXsHw-wAKi&UoQG~2g(!P{b{GX(}AeBmC
z#R8GNXbb<f6AZT*pB3~8cLDGK0l1KN1QtC&{Xk>`{AaCskQkc)no2SGtT;dkAn{Lm
z)hK@G>4Cg5Kqeq3(8M+*4iRpK2T&XS>gi!gFRTJUV?e|Ka#GLdBoSZ;2avb~i15WJ
zK~5DgcBGg+P>wfp>H&FEoC(V50CXzF2~bWypg@WfC7iqvpmCsSAQmF)!NpAjc`-og
zgGgKmBnECH<+6ZGKz1M}a07&!;|AmfBH=}`>)AIXU4Z(5LO|l}a9pXN^TNQ00VRNl
zI$tu;&p-u81@wQ#k=Oy%1GNFMV1%8|ybl;7K*WHP)bS*nE|~^K9H{Vh2o|&}Qre3u
zfZ+x@2Gm0Yn}gdc_cFn7OJIT2{&Q$HAYv+->VihV<hdYSAUy)0Bc%|k;uU)VlXrnC
z^aBW_6x=C=5Mc7CP=#>-;go`GD{mnVm^>RqAwL~n1ToP4F_d6e*H7|Qz^zD0=AApK
z#0}hrlw{uP5i02dZf{C5?=C<jFnvdYWiMP71Rr?ukbpUY+fUWCzGHmf_&a07_!ncA
zsn4V^yUhaYudRqhs0_k+MEj*qpg*j?t*<q-8=o{CGreY#m`&!F&2O0VEyb2+ETB|@
z&<^-4WgYVG%g@L~N{!O1Jf)PWe5zj64^<<o8>*YCuT=}wxoVwymwK=In))mCGEK9l
zTXVBjGpD(v{jFA^+oC(I8_<2D6YGVBVgqzPB9E4bZDOBigX~-EFWC3l57`?K7<s6`
zC-UeU^d0&VL#d(C5H`#hknxPM&p2Y-WvVqbLzsnTm3hKEX^xt^EIpP%OO@4b^;ltJ
z8$xH`lg<j+))F?vUSY%R5qTTLu2Yy4yA&}+Tp>{gloyl}$_7=Vs#IO3o>6n^h&rb3
z)AVc3X#$#Bty^2KZO|6#q&k_dR5z)c(oO4TbUpf0db`1C5E@0sE5<Qn8`P}B)MGN4
zE6kN<yE$%7n2{yjBD7qvT(pd|T0)j9mPV`B+H5^yZ6ne3AaoHvWh~3;*b26i<=6-t
zV-qZr_sh@81M-XV5xHAYuaGKb$`WOna!NU^3@bV1DJb_DRlmxq-T`H<RlC(9jaW0L
z8P`l`rZmU29ojDKDQ$(WQfJpCbVx7MU(~mb=&$I#hGs*Xfi<d(I-|*00o@Qa#*A@e
z!g$U!XbPAvm@b;?%^q{3S!OA*lv>IxtYz8~w#--}mR{=_YoE2>dd`aa_+#0@?qX|M
zH(Sq&<r2A6J}#e-Ps*p{)A9~Qm!d~;O3|wbK=vb$_}CUpgS8QIZ?hh=q5wiB$h?#-
sV^ypPa-V_Rqih^<?}OY2<rn0&3b(?eXjBv`r8M_R$e!d5_o()N0eqSqKL7v#

delta 5545
zcma)9e^}Gkxj)|!AZRE71A?R)6pOS`zxn+;K_wbjVgoa)_eyK9s8P|Ojgz%7Lcu#Q
zau<5sW;^%Xw7QmEX`3!y!PfQ3Iwo{wD{X0)cHHMlTkQQh^0e_TZMnT^bKi3kHupUJ
z<K}tZe9!0ezUTdM&ig$l@r8K45HDb2qC@5__IHZ@ZEoo3t<gof@oD19;}j=GpMdAV
zo1a}dOP{x{1nBdQ(aiI&6gM4w$V2B&m*?!;{QS`T69vH>gsR;LN$>~tGCmBdhpH+I
zHhA}Sco9O4_yeYfc@P)yYIuq3Si#%D1j9YNZf1xTJkGCQKXL_$uKPmpje5-lAn&rP
z3`{)yCMtU6F`_sXIl)&(=5xnF;bZ(ZMnkQ92tB;UZxiI$QvO21oEG&D5$mT2=#4NJ
z$aDVXjPiO<2rtXKml?r(^7fY!%{{infGz2FB#n;phpT(cBl!J1i#DTKINkpOnDjfo
z_GfHe3w~Q7#|Au!(0`g}{jNlgYvHu*>;A_=_`duyIhl(ft>e7;TAIXZhoGNM$av{}
zl^LM!s1j-p|0F+@!wlnXOOG=n;g^>Fo|jL2fCl^Tcywt*!-!73<yG~X$8mAd-OM;P
z6&aQj_JQ9{!N=u~doBYygt%MPc(7<i0Ywb=$7&oZ+Ll2)?g!Ps60PGg!{OSJ8itt)
zH!f2%j0XQu+{Xm4u(WE4{rgaiL(s_OVQuN1dnN@4^^OZJjDu?k4d2KsF1lj-@>Ikt
zq;^{ii3R^9KG~&L(S(o0C0>fIIKDddnzw{%e)A!kc#LQQ&m&PLQHFbcB@UOaXCnB6
z(v9$4T&8N8q!E_+Z6c@NA#C)!1WoBuLI!%fggGb57IP@{+l0<^DNSpf|G=w4L{^|T
zF6cF9T$E%6j2BA;D*Sp`VR?qnz4;_0I+05dX)JCKC(2qx1bXULlZ{L@anDI`%koxU
zy9`e(e<*L<i$vTJ%$8R&Rk)#it#}%U`TPkR7lnb}#yLHl7oRG>#57}h#ht5CS@o>o
zM8==rHVf6vKsCuGPdPqPp=DV7i;5e}UhH4d%v{6ViX+`Dyf{!J@(PMZZKTWdm64l#
zNO7_Mq2lX&5h9B|=!!ORKV65A2Z6!hsp!|~w3GYnW;zXfrF?DwfFt=5^d8zj;QBnH
z9k4A7I-)Q!*~C4N*;9{zN9Ina$lR;lnaLBj6wE_|j#Q(E1Rwk{(KkU#$u~JDVxbWl
zY%@e2uqArC5~#DJXw)UryHXDq^mL`jjN|0(bSfkJ2W+wZeuoH>`OxnY4!Gw0w$z(3
zpt-9gGVp)Ncpq@Z2W@w8&Ot}4ky9+84M)6^g6l<TGzMOg{|4=Id{D9pmhF<Hwq){4
zQZ9eM7C*%J9q~r427fG(F^BN7JNI^{OC@e3f~tGF7El*SDF?z+C(tK7aVcZ~=LN*%
z8o9%F!AjEA_;KtjsFr)}zcMRHideWxs)B649HiO-S85Qpd=Qp`ljqT72VIG#LED1!
z5ITg?rEutpy5ThG?MnVFaw3~Pc~diuDkBX%2u;C#vKCT-KIgpulPxHD2y#n0xwj!O
zY-qL$ZviZS@AsvMcE+1zP2eQjEOl~~nTa{JBrL{+ErC!sLi1pr==}{d;7CxrawnHu
zpaJx@L{V>m%qOb2pMToPy_Zo@7n~m!1-7h-dz+){0L{3^Z(H!&s~fq{ZH9Aj>iKPR
zMWaGzBlm1Z0V|gByBv_1#L4yDW*jRTb$}*D!yUd&F$=H9eE!Y*(&@A^l36GEDhmf;
z4GuBfC)If4DqV#drhxC>t`wpdK-HlF*ik3fg?+2mHV7wdN$4Du?cWL80surdg|sql
z`)$?a1rxvv1{?2pqy_}^b-M#rr{}|Tn)HKk0Gcj|U#t~lo77^g{UW;|31~;5(Umy9
z0#1G>6e4W&z<nm-PJTozgwS5Qi3wSxhUtz*?o<4VbPe-5{!r@7lM=%lSXZ?!uYn9l
z@%}0UPx3i_y2`*z<NvIxV6NaVs{W~@`7;_WCjrU%8)5nH#d}xR@|gd{gR5H<iya;B
z?TVwW+?y{#eV$!#!f=-tUj%N@6>B2rQN|XpToYUdOL>Ta16u&y>4|Iv^MA%~uTkZ@
zpf)FG!go}Ehp~tERXcfjZOtks8n)HElEW)_ALrg<$@dWV_3z<t-E%MR<a^<t-g7^*
zVi+!^+|CNpzHnbcpQM^RL1OniELeYdNrawxkbDStt+z2x;@_@cy>yb0FoCnYj}z+~
zc+x*%%Z8OpW(W;y$xYz>8}!Vtad5+C-rb+zxec{E>n&Wg@&1~BnWr5%V1r(ZLv3>q
zG;=L@#3v!>)>)z#<2{;6JiPH`xIbH2X}*)Fw)`3U*%izbeug~?e5t%9zmxF(_wg1v
z%e;rZ@|smKV4wk-xahkyP`@qhi4zVwhwJ+jeo4N&BvWxQG4m`C56|Elg=zgjf^JvQ
zl?dF0wd82@2s%Jp{1){Do%`%9Y5^za&%*)5F-E$ZP9cZ;N?14lW<FFS@E13EX2}8`
znZ{MBBg`N1k5nq=2|TXS@>)K{U#Xn=;_rdl@CM$gR+de}O=YwRPUMG}n;YSP01pF&
z)8U`1zstzakoYgp(D=})aQgbiJxyEzJ<i~yOwNyzQ|W}*bCFm!{sEh{+Wct~jB`G~
zk7_IOCJB%6lUfJ!8RoQWBvl{N8sOCK?c&ghWwg$9aJ_Q}rf`jJYqymez#&CnO>gea
zo1`)8A!ENw)WikfpxHpl)t;R|dfY@qdLb_}(a9Z2QWNkn|2<C|Q9DG9+=1IBc94la
zq9Lc!A8t^4C#QmmIIb&L+fRLpoZN=nCY>Xbf<!}F?CcG^Lcen3Fm(cltubRsckhIR
zL?mahXAJb*8}E<+!t1$H_<;TZ{7ig9zdC=6?7Ze3yr5sh%wUP3xoie@0(r~+@wdth
z{#S#&Ec#8VD2^M>@Vvjrj~MGJXQ>68Ct~OU5?lz~Er2(7R)gO$KCnEN^#E&_dDR4s
z>C%`6Z#OxX&wZ2kISqc!v|~B<P4hSnE-_!`-8G3P&2^RWY-pDlT;KjXE3OHzvDosK
z1+O4c`ot2h`OoR}1p=N5;z#Rs_-ehRB)wJ)_gi{xIsGP=Z1}0pJq%9NJ1yt-A~XW>
z3P=>>zd`aIg8u>_29U)M{8czW#_-D#iJ^scEg;*lW$WjKRdqxG9})SpuxZ<}QaH8I
zhJ&pw-j-@LQDpvH9NczNf15@{G$j0WcxJ6NghTBJ4Q~aGH~<LmGP2_eyEShoNyt4^
zRkr>AllBpQlecaA(SzPiM;|`;ZSV1eM-QM!P($nSZTr0SZEg0JqpfZG@XPi}o|TVf
zJIZm=em8#Cz7jueUx8O{A9?y)yaI(xx^>@PsazQ@Xb3ZZhxm7now<ryNC$7;@f{vQ
z;h*eyi!X>>K%ykbP*pi2!i?)@x#%JiiSSddN`7cN62Tv=bGZ_rWR#j-q;x>pDV12H
zb^_H(D8@-0wE^y=n5DQM@Og^cDINhFp*TeGG~hVJ0gCy*Kq3k5*<A_u7fFl-Y^OLu
zaWmi!ibeR={Xfe+19+HXmf{HDS&E$$CjpD`u?H)ec8b|&kVpr3G`Ai`4P;cj3r22`
zHV{}U#Nrp-mx_IW`auF&77-Q)-GYM4S;k52CIQWW#IkH1Ws`tXAi`%eKHY@Pl>n0A
z#@5l?Iv6?Ws1-(DI`Y9NKt{#qVKf3V2J-ierT|4jVp*1ERGc7a7^(yb-3vnF%K(``
zoFLP{P7_w-0ptbg0|@|&L++7^^8kiHNT7%<`vg3LnZp+<reGA!YDHT!+63XKHU%Tm
zFEea=Rx1NMOSA$VjO-xLPnV0EVbBiJ2SP$Qam#lv6$b%@K*q9crlv_ivml8q>!!BC
ze}~=xVL|G4z>~1Kc0g`i+&)^|0iyuOFbIjzPG?O4;z0P9GOUlVh3f#RvJApK-WLk%
z0qx8(b(HY{@@5$+W%>XGvW%NB#X&$LAlE>G4v2s^ow!sy185Fp0mKJv4EG$oR4f{S
z{6S<O5n!dnwy+M+P7so@8y`FQLU9|QK9C@Y59$oiIwF8(KxRRREy6?lM~F-G9Be+w
z-<G6U2B;py4H5(gL0Ym8(0LFtH$nql0~7^W0EvNJh}emx&x0?JRuFU_u+S6qBpzaO
zfAxdVk_y>>KyQH%NA7KzrxS3DwnQ_GJXtMk&uBXUb5z?0qd-<Wot+y5JV&)7Fp6Zg
zf$ZEU;5gBi&cP^=)h@>6<6~!@EfvE}A_XBCEyiT2rViMKtY$G@Gu5;K+mY2Q#tIM(
zTsYUl4L^F72LoyNj>nJl7&vjxwsikcj|@IjiMh@^ZTZNOvOH%+EJ8Af8P+_gJFfdk
z=h9E<ZyDY;CXJ=0KGTS4sd<(8lzGVfvH7$W)giPCY=6yu!3yP_if0sa3cfO+{Hs!+
zDpbi-n^ap>`&5spx>dufN!7b5i@HVaS99vG)LS*XHFs;bbZej1j%xFCmAb<^pRPkc
zq>ox}SpQ;0PK3(A@fx<4-N>q0BfE*+#yVJXLDd6q*Ei@rdcHwms5V3l<A#`_)95pv
zH|k6#(=Joev|tjOL*~ooX>*&U-EzjV&MLFot#ek+3X^?s`N4zF2HEb5Y=mu=yXBqo
za)m^pQ_Lu$ilic-JgW>T8&pn}KrK{{t0&b{>KV09)2Hdz1T-dXowi=vpk2_Wv`8n=
zUDl21uIa{g?Ru|XW?&7RA#S*695%X5ttOAD-6Szf&DCa^IclCY$INqP&V1H#&T`%o
zv|Q}AIITOayR6MtH_0r3&=@>~tcWdVrEE1j$xg8|>?|8&`{e!dfc%{Nyu40PuShA7
zlCKmh$CTHU5#^-PtLjj7s`^x{TBX*hP3k&zT%Ax4YeqC7&6uWD>(RDpy;`ZRS|`)Z
z>S8)hcTRs^e^KAP)3D3nHi(R3W4Tdclp3du)5aNN)HrMGH=QvBOlM8!O!a2F*=a@=
zzC~aWT11v>mWXBCGG*zoc3OSbK5M@f1rcclq+)ffiLGPn*#w(pQ|yR5B)=>llV6j2
z6m5!jg;&v`2w2WS?H4V>mIkYH3)JqmwpvjLArm+dutHYMN}%>}sC}A^LhU|zpZtvc
atlXriQ`i+w#R61MYQGHCliJ}VcK;t|AO9-=

diff --git a/bin/mimalloc-redirect32.lib b/bin/mimalloc-redirect32.lib
old mode 100644
new mode 100755
index e29272506cf314ede4bff04eb7a1d7949739c43d..bf64978793de2eb34b5ec714fcffa813c7188d88
GIT binary patch
delta 106
zcmew$_Caie84Js^=+G;ZtyuOj{+z7It}$7j)sFG!<~UX(MozuEdu|?OU|?XHY|kP+
wc?(+!Scw6UjANCaT*@BHx!9)l;X|kjv&n)SjbIfrlh<=_KvZqM%rTb<0OMsOVgLXD

delta 106
zcmew$_Caie84F9)#;yyKtyuOj{+O)Et}$7j)sFGU<~UX(Mow+}6ZelYFfcGpwr7!^
wyoId<ti%9F#<5CIE@hA9jN2~s{UKC^*<?YEMz9K*$?G{dAgVTB=9tR_0J1hCp#T5?


From f46c1d262402c7088427cdbdcd3de7e4173328e1 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 28 Mar 2025 13:28:10 -0700
Subject: [PATCH 278/352] update readme

---
 readme.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/readme.md b/readme.md
index 81f2057e..0727fba9 100644
--- a/readme.md
+++ b/readme.md
@@ -84,6 +84,9 @@ Enjoy!
 
 ### Releases
 
+* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3-beta`: Various small bug and build fixes, including:
+  fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, 
+  fix execution on non BMI1 x64 systems. 
 * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. 
   Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. 
   Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile. 

From 8a81fc73c88750085ea40ae4dfb49955bdf93c71 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 28 Mar 2025 14:18:28 -0700
Subject: [PATCH 279/352] update readme

---
 readme.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/readme.md b/readme.md
index 0727fba9..87cee98d 100644
--- a/readme.md
+++ b/readme.md
@@ -12,9 +12,9 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the runtime systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
 
-Latest release   : `v3.0.2` (beta) (2025-03-06).  
-Latest v2 release: `v2.2.2` (2025-03-06).  
-Latest v1 release: `v1.9.2` (2024-03-06).
+Latest release   : `v3.0.3` (beta) (2025-03-28).  
+Latest v2 release: `v2.2.3` (2025-03-28).  
+Latest v1 release: `v1.9.3` (2024-03-28).
 
 mimalloc is a drop-in replacement for `malloc` and can be used in other programs
 without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:

From a78374d816a84b5c72db3842eaada88785cc027e Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 28 Mar 2025 14:22:02 -0700
Subject: [PATCH 280/352] bump version to 1.9.4 for further development

---
 cmake/mimalloc-config-version.cmake | 2 +-
 include/mimalloc.h                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index aeea621f..0446485b 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
 set(mi_version_major 1)
 set(mi_version_minor 9)
-set(mi_version_patch 3)
+set(mi_version_patch 4)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 8ccfcec3..97cf7856 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 193   // major + 2 digits minor
+#define MI_MALLOC_VERSION 194   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes

From f2539bbe343c7f5c2c862d2d0cd9f1094ef4289e Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 28 Mar 2025 15:37:11 -0700
Subject: [PATCH 281/352] update readme

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 87cee98d..5a495275 100644
--- a/readme.md
+++ b/readme.md
@@ -84,7 +84,7 @@ Enjoy!
 
 ### Releases
 
-* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3-beta`: Various small bug and build fixes, including:
+* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including:
   fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, 
   fix execution on non BMI1 x64 systems. 
 * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. 

From cbab63f6c9935080052dd2648d4480013d6ec2c7 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 30 Mar 2025 16:15:27 -0700
Subject: [PATCH 282/352] fix release build warning (unused
 mi_stat_total_print)

---
 src/stats.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/stats.c b/src/stats.c
index 07ce7d16..92bc049c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -218,12 +218,14 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int
   _mi_fprintf(out, arg, "\n");
 }
 
+#if MI_STAT>1
 static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
   _mi_fprintf(out, arg, "%10s:", msg);
   _mi_fprintf(out, arg, "%12s", " ");  // no peak
   mi_print_amount(stat->total, unit, out, arg);
   _mi_fprintf(out, arg, "\n");
 }
+#endif
 
 static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
   _mi_fprintf(out, arg, "%10s:", msg);

From 07743454e5a04356144918e32b1e2ce8e80c1726 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 31 Mar 2025 10:57:16 -0700
Subject: [PATCH 283/352] fix dynamic override test on non-windows platforms

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5cc7ec5c..879aa668 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -729,8 +729,8 @@ if (MI_BUILD_TESTS)
     endif()
     target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags})
     target_include_directories(mimalloc-test-stress-dynamic PRIVATE include)
-    target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries})  # mi_version
     if(WIN32)
+      target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries})  # mi_version
       add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 $<TARGET_FILE:mimalloc-test-stress-dynamic>)
     else()
       if(APPLE)

From a9e94674299479588b742cefa3ebe36bb72cc83b Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 31 Mar 2025 11:00:05 -0700
Subject: [PATCH 284/352] make dynamic override test verbose

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 879aa668..66b24d89 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -731,14 +731,14 @@ if (MI_BUILD_TESTS)
     target_include_directories(mimalloc-test-stress-dynamic PRIVATE include)
     if(WIN32)
       target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries})  # mi_version
-      add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 $<TARGET_FILE:mimalloc-test-stress-dynamic>)
+      add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $<TARGET_FILE:mimalloc-test-stress-dynamic>)
     else()
       if(APPLE)
         set(LD_PRELOAD "DYLD_INSERT_LIBRARIES")
       else()
         set(LD_PRELOAD "LD_PRELOAD")
       endif()
-      add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 ${LD_PRELOAD}=$<TARGET_FILE:mimalloc> $<TARGET_FILE:mimalloc-test-stress-dynamic>)
+      add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 ${LD_PRELOAD}=$<TARGET_FILE:mimalloc> $<TARGET_FILE:mimalloc-test-stress-dynamic>)
     endif()
   endif()
 endif()

From e1110cdb9f64ec319f91fb5b5607bffd3ed76559 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 31 Mar 2025 11:02:10 -0700
Subject: [PATCH 285/352] nicer cmake logic for windows override test

---
 CMakeLists.txt | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 66b24d89..591ba130 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -724,13 +724,11 @@ if (MI_BUILD_TESTS)
   if(MI_BUILD_SHARED AND NOT (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN))
     add_executable(mimalloc-test-stress-dynamic test/test-stress.c)
     target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE ${mi_defines} "USE_STD_MALLOC=1")
-    if(WIN32)
-      target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1")
-    endif()
     target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags})
     target_include_directories(mimalloc-test-stress-dynamic PRIVATE include)
     if(WIN32)
-      target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries})  # mi_version
+      target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1")  # link mi_version
+      target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries})  # link mi_version
       add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $<TARGET_FILE:mimalloc-test-stress-dynamic>)
     else()
       if(APPLE)

From 77b622511ad86ff7ba01154dfca86dafe9032b86 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 31 Mar 2025 14:44:46 -0700
Subject: [PATCH 286/352] fix alpine compilation with prctl.h (issue #1059)

---
 src/prim/unix/prim.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 32004fe4..09aa91b5 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -31,10 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #if defined(__linux__)
   #include <features.h>
-  #include <linux/prctl.h>  // PR_SET_VMA
-  //#if defined(MI_NO_THP)
-  #include <sys/prctl.h>    // THP disable
-  //#endif
+  #include <sys/prctl.h>    // THP disable, PR_SET_VMA
   #if defined(__GLIBC__)
   #include <linux/mman.h>   // linux mmap flags
   #else
@@ -208,7 +205,7 @@ static int unix_madvise(void* addr, size_t size, int advice) {
 
 static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) {
   void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */);
-  #if (defined(__linux__) && defined(PR_SET_VMA))
+  #if defined(__linux__) && defined(PR_SET_VMA)
   if (p!=MAP_FAILED && p!=NULL) {
     prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc");
   }

From fe47ec625db55a51eb5cc66655a8dfb06e03b6a6 Mon Sep 17 00:00:00 2001
From: Eduard Voronkin <evoronkin@roblox.com>
Date: Mon, 31 Mar 2025 15:51:26 -0700
Subject: [PATCH 287/352] fix recursion in TLS init on Android

---
 src/options.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/options.c b/src/options.c
index 772dfe66..610fe5ba 100644
--- a/src/options.c
+++ b/src/options.c
@@ -425,14 +425,14 @@ static mi_decl_noinline void mi_recurse_exit_prim(void) {
 }
 
 static bool mi_recurse_enter(void) {
-  #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
+  #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD)
   if (_mi_preloading()) return false;
   #endif
   return mi_recurse_enter_prim();
 }
 
 static void mi_recurse_exit(void) {
-  #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
+  #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD)
   if (_mi_preloading()) return;
   #endif
   mi_recurse_exit_prim();

From 235a0390eec64c54f97e6bcbfc0e24307031812a Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Wed, 2 Apr 2025 10:34:00 -0700
Subject: [PATCH 288/352] refactor numa_node_count

---
 include/mimalloc/internal.h | 22 +++------------------
 src/os.c                    | 39 ++++++++++++++++++++++++-------------
 2 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 5b3e7e23..51fad09c 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -140,9 +140,11 @@ void*       _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t
 void*       _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
 bool        _mi_os_use_large_page(size_t size, size_t alignment);
 size_t      _mi_os_large_page_size(void);
-
 void*       _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid);
 
+int         _mi_os_numa_node_count(void);
+int         _mi_os_numa_node(void);
+
 // arena.c
 mi_arena_id_t _mi_arena_id_none(void);
 void        _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid);
@@ -813,24 +815,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
   return x;
 }
 
-// -------------------------------------------------------------------
-// Optimize numa node access for the common case (= one node)
-// -------------------------------------------------------------------
-
-int    _mi_os_numa_node_get(void);
-size_t _mi_os_numa_node_count_get(void);
-
-extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count;
-static inline int _mi_os_numa_node(void) {
-  if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
-  else return _mi_os_numa_node_get();
-}
-static inline size_t _mi_os_numa_node_count(void) {
-  const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
-  if mi_likely(count > 0) { return count; }
-  else return _mi_os_numa_node_count_get();
-}
-
 
 
 // -----------------------------------------------------------------------
diff --git a/src/os.c b/src/os.c
index 12cc5da3..894e3a45 100644
--- a/src/os.c
+++ b/src/os.c
@@ -696,34 +696,47 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
   }
 }
 
+
 /* ----------------------------------------------------------------------------
 Support NUMA aware allocation
 -----------------------------------------------------------------------------*/
 
-_Atomic(size_t)  _mi_numa_node_count; // = 0   // cache the node count
+static _Atomic(size_t) mi_numa_node_count; // = 0   // cache the node count
 
-size_t _mi_os_numa_node_count_get(void) {
-  size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
-  if (count <= 0) {
+int _mi_os_numa_node_count(void) {
+  size_t count = mi_atomic_load_acquire(&mi_numa_node_count);
+  if mi_unlikely(count == 0) {
     long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
-    if (ncount > 0) {
+    if (ncount > 0 && ncount < INT_MAX) {
       count = (size_t)ncount;
     }
     else {
-      count = _mi_prim_numa_node_count(); // or detect dynamically
-      if (count == 0) count = 1;
+      const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
+      if (n == 0 || n > INT_MAX) { count = 1; }
+                            else { count = n; }
     }
-    mi_atomic_store_release(&_mi_numa_node_count, count); // save it
+    mi_atomic_store_release(&mi_numa_node_count, count); // save it
     _mi_verbose_message("using %zd numa regions\n", count);
   }
-  return count;
+  mi_assert_internal(count > 0 && count <= INT_MAX);
+  return (int)count;
 }
 
-int _mi_os_numa_node_get(void) {
-  size_t numa_count = _mi_os_numa_node_count();
+static int mi_os_numa_node_get(void) {
+  int numa_count = _mi_os_numa_node_count();
   if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
   // never more than the node count and >= 0
-  size_t numa_node = _mi_prim_numa_node();
+  const size_t n = _mi_prim_numa_node();
+  int numa_node = (n < INT_MAX ? (int)n : 0);
   if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
-  return (int)numa_node;
+  return numa_node;
+}
+
+int _mi_os_numa_node(void) {
+  if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { 
+    return 0; 
+  }
+  else {
+    return mi_os_numa_node_get();
+  }
 }

From 3c3600f85fe4a6d7f01a9e69db3cada9e63627cb Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Wed, 2 Apr 2025 10:36:01 -0700
Subject: [PATCH 289/352] add atomic_cas_ptr_strong_acq_rel

---
 include/mimalloc/atomic.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h
index 39ff5c90..e8bac316 100644
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@@ -111,6 +111,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
 #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des)    mi_atomic_cas_weak_release(p,exp,(tp*)des)
 #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des)    mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
 #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des)  mi_atomic_cas_strong_release(p,exp,(tp*)des)
+#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des)  mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des)
 #define mi_atomic_exchange_ptr_relaxed(tp,p,x)          mi_atomic_exchange_relaxed(p,(tp*)x)
 #define mi_atomic_exchange_ptr_release(tp,p,x)          mi_atomic_exchange_release(p,(tp*)x)
 #define mi_atomic_exchange_ptr_acq_rel(tp,p,x)          mi_atomic_exchange_acq_rel(p,(tp*)x)
@@ -120,6 +121,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
 #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des)    mi_atomic_cas_weak_release(p,exp,des)
 #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des)    mi_atomic_cas_weak_acq_rel(p,exp,des)
 #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des)  mi_atomic_cas_strong_release(p,exp,des)
+#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des)  mi_atomic_cas_strong_acq_rel(p,exp,des)
 #define mi_atomic_exchange_ptr_relaxed(tp,p,x)          mi_atomic_exchange_relaxed(p,x)
 #define mi_atomic_exchange_ptr_release(tp,p,x)          mi_atomic_exchange_release(p,x)
 #define mi_atomic_exchange_ptr_acq_rel(tp,p,x)          mi_atomic_exchange_acq_rel(p,x)
@@ -303,6 +305,7 @@ static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p,
 #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des)    mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
 #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des)    mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
 #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des)  mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
+#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des)  mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
 #define mi_atomic_exchange_ptr_relaxed(tp,p,x)          (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
 #define mi_atomic_exchange_ptr_release(tp,p,x)          (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
 #define mi_atomic_exchange_ptr_acq_rel(tp,p,x)          (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)

From 5a58df6534cbc8673a655e5772461ef7fd4bcbcb Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Wed, 2 Apr 2025 10:40:30 -0700
Subject: [PATCH 290/352] fix signed compare warning

---
 src/arena.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index a7c20764..bdae8da1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1007,17 +1007,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
   if (pages == 0) return 0;
 
   // pages per numa node
-  size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
-  if (numa_count <= 0) numa_count = 1;
+  int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
+  if (numa_count == 0) numa_count = 1;
   const size_t pages_per = pages / numa_count;
   const size_t pages_mod = pages % numa_count;
   const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
 
   // reserve evenly among numa nodes
-  for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
+  for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
     size_t node_pages = pages_per;  // can be 0
-    if (numa_node < pages_mod) node_pages++;
-    int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
+    if ((size_t)numa_node < pages_mod) node_pages++;
+    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
     if (err) return err;
     if (pages < node_pages) {
       pages = 0;

From d767dbfbb45e2e38502b03dbb57698845899d34f Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Wed, 2 Apr 2025 10:50:36 -0700
Subject: [PATCH 291/352] use C++ compilation with clang-cl (as well as msvc)
 on Windows

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 591ba130..a3acf83e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -173,7 +173,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
     list(APPEND mi_cflags -Wall)
 endif()
 
-if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
+# force C++ compilation with msvc or clang-cl to use modern C++ atomics
+if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL)
   set(MI_USE_CXX "ON")
 endif()
 

From 8c99ac1bbd9c692239bbf70c40f9be578d54d394 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Wed, 2 Apr 2025 11:16:33 -0700
Subject: [PATCH 292/352] fix typo

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 5a495275..70a25fc1 100644
--- a/readme.md
+++ b/readme.md
@@ -177,7 +177,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
 
 Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
 The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the
-`mimalloc-override-dll` project builds DLL for overriding malloc
+`mimalloc-override-dll` project builds a DLL for overriding malloc
 in the entire program.
 
 ## Linux, macOS, BSD, etc.

From 9f5a2969b801bee83716f1436fb08bd6c6099b11 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 2 Apr 2025 12:08:26 -0700
Subject: [PATCH 293/352] clarify v3 tag

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 87cee98d..5a495275 100644
--- a/readme.md
+++ b/readme.md
@@ -84,7 +84,7 @@ Enjoy!
 
 ### Releases
 
-* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3-beta`: Various small bug and build fixes, including:
+* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including:
   fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, 
   fix execution on non BMI1 x64 systems. 
 * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. 

From bc8eca8bf2641f12ecc23d7527aecdb62d6d2939 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 2 Apr 2025 12:09:09 -0700
Subject: [PATCH 294/352] typo

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 70a25fc1..cee78898 100644
--- a/readme.md
+++ b/readme.md
@@ -84,7 +84,7 @@ Enjoy!
 
 ### Releases
 
-* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including:
+* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including:
   fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, 
   fix execution on non BMI1 x64 systems. 
 * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. 

From af21001f7a65eafb8fb16460b018ebf9d75e2ad8 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 2 Apr 2025 12:17:45 -0700
Subject: [PATCH 295/352] clarify to use  as the PR branch

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 5a495275..8b2ada1f 100644
--- a/readme.md
+++ b/readme.md
@@ -73,7 +73,7 @@ Enjoy!
 ### Branches
 
 * `master`: latest stable release (still based on `dev2`).
-* `dev`:  development branch for mimalloc v1. Use this branch for submitting PR's.
+* `dev`:  development branch for mimalloc v1. **Use this branch for submitting PR's**.
 * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` 
           (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage
           mimalloc pages that can reduce fragmentation.

From 951538d469f72532e43a2437d556a73518f5dde4 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 8 Apr 2025 13:56:31 -0700
Subject: [PATCH 296/352] fix prctl.h includes for alpine linux/musl (hopefully
 fixes #1065, #1066, #1067)

---
 src/prim/unix/prim.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 09aa91b5..e3888e73 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -32,6 +32,9 @@ terms of the MIT license. A copy of the license can be found in the file
 #if defined(__linux__)
   #include <features.h>
   #include <sys/prctl.h>    // THP disable, PR_SET_VMA
+  #if !defined(PR_SET_VMA)
+  #include <linux/prctl.h>
+  #endif
   #if defined(__GLIBC__)
   #include <linux/mman.h>   // linux mmap flags
   #else

From 7543e8989abe41e87f260424b3711931d680da77 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 13 Apr 2025 19:49:47 -0700
Subject: [PATCH 297/352] validate pointer before assertion in mi_free_size
 (issue #754)

---
 src/free.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/free.c b/src/free.c
index 7e529530..22284135 100644
--- a/src/free.c
+++ b/src/free.c
@@ -340,7 +340,11 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
 
 void mi_free_size(void* p, size_t size) mi_attr_noexcept {
   MI_UNUSED_RELEASE(size);
+  #if MI_DEBUG
+  mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free_size");
+  if mi_unlikely(segment==NULL) return;
   mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
+  #endif
   mi_free(p);
 }
 

From fae61ed946ceaf88f8e902aa596bb46305b531d6 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 13 Apr 2025 19:56:49 -0700
Subject: [PATCH 298/352] fix assertion in mi_free_size (issue #754)

---
 src/free.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/free.c b/src/free.c
index 22284135..d0940a16 100644
--- a/src/free.c
+++ b/src/free.c
@@ -341,9 +341,8 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
 void mi_free_size(void* p, size_t size) mi_attr_noexcept {
   MI_UNUSED_RELEASE(size);
   #if MI_DEBUG
-  mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free_size");
-  if mi_unlikely(segment==NULL) return;
-  mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
+  const size_t available = _mi_usable_size(p,"mi_free_size");
+  mi_assert(p == NULL || size <= available || available == 0 /* invalid pointer */ );
   #endif
   mi_free(p);
 }

From aad0bc2ae3bc498b8e405d3f89be90c22abe76d8 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 2 May 2025 08:09:40 -0700
Subject: [PATCH 299/352] fix cast on msvc

---
 include/mimalloc/internal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 51fad09c..6283f1d1 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -855,7 +855,7 @@ static inline size_t mi_clz(size_t x) {
   #else
     _BitScanReverse64(&idx, x);
   #endif
-  return ((MI_SIZE_BITS - 1) - idx);
+  return ((MI_SIZE_BITS - 1) - (size_t)idx);
 }
 static inline size_t mi_ctz(size_t x) {
   if (x==0) return MI_SIZE_BITS;
@@ -865,7 +865,7 @@ static inline size_t mi_ctz(size_t x) {
   #else
     _BitScanForward64(&idx, x);
   #endif
-  return idx;
+  return (size_t)idx;
 }
 
 #else

From 6bfb1c656c6da4eaa8dab4d8fc0197f1ed946483 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Fri, 2 May 2025 08:40:21 -0700
Subject: [PATCH 300/352] allow size==0 for mi_prim_free (issue #1041)

---
 src/os.c                   | 14 +++++++-------
 src/prim/emscripten/prim.c |  4 ++--
 src/prim/unix/prim.c       |  5 +++--
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/os.c b/src/os.c
index 894e3a45..be7e532c 100644
--- a/src/os.c
+++ b/src/os.c
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -167,8 +167,8 @@ static void mi_os_free_huge_os_pages(void* p, size_t size);
 
 static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) {
   mi_assert_internal((size % _mi_os_page_size()) == 0);
-  if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
-  int err = _mi_prim_free(addr, size);
+  if (addr == NULL) return; // || _mi_os_is_huge_reserved(addr)
+  int err = _mi_prim_free(addr, size);  // allow size==0 (issue #1041)
   if (err != 0) {
     _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
   }
@@ -186,10 +186,10 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
     void* base = addr;
     // different base? (due to alignment)
     if (memid.mem.os.base != base) {
-      mi_assert(memid.mem.os.base <= addr);      
+      mi_assert(memid.mem.os.base <= addr);
       base = memid.mem.os.base;
       const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base;
-      if (memid.mem.os.size==0) { 
+      if (memid.mem.os.size==0) {
         csize += diff;
       }
       if (still_committed) {
@@ -733,8 +733,8 @@ static int mi_os_numa_node_get(void) {
 }
 
 int _mi_os_numa_node(void) {
-  if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { 
-    return 0; 
+  if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) {
+    return 0;
   }
   else {
     return mi_os_numa_node_get();
diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c
index 82147de7..a8677cbc 100644
--- a/src/prim/emscripten/prim.c
+++ b/src/prim/emscripten/prim.c
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen, Alon Zakai
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -58,7 +58,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config) {
 extern void emmalloc_free(void*);
 
 int _mi_prim_free(void* addr, size_t size) {
-  MI_UNUSED(size);
+  if (size==0) return 0;
   emmalloc_free(addr);
   return 0;
 }
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index e3888e73..f93e458a 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -70,7 +70,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MADV_FREE  POSIX_MADV_FREE
 #endif
 
-  
+
 //------------------------------------------------------------------------------------
 // Use syscalls for some primitives to allow for libraries that override open/read/close etc.
 // and do allocation themselves; using syscalls prevents recursion when mimalloc is
@@ -186,6 +186,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
 //---------------------------------------------
 
 int _mi_prim_free(void* addr, size_t size ) {
+  if (size==0) return 0;
   bool err = (munmap(addr, size) == -1);
   return (err ? errno : 0);
 }

From 2c34250f43c0c06cdc3c405781eed90daf008361 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Fri, 2 May 2025 08:55:16 -0700
Subject: [PATCH 301/352] extend override test on windows

---
 test/main-override-dep.cpp | 38 +++++++++++++++++++++++++++++++++++++-
 test/main-override.cpp     |  5 +++--
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/test/main-override-dep.cpp b/test/main-override-dep.cpp
index e92f6fc4..d89e3fca 100644
--- a/test/main-override-dep.cpp
+++ b/test/main-override-dep.cpp
@@ -1,6 +1,7 @@
 // Issue #981: test overriding allocation in a DLL that is compiled independent of mimalloc. 
 // This is imported by the `mimalloc-test-override` project.
 #include <string>
+#include <iostream>
 #include "main-override-dep.h"
 
 std::string TestAllocInDll::GetString()
@@ -10,6 +11,41 @@ std::string TestAllocInDll::GetString()
 	const char* t = "test";
 	memcpy(test, t, 4);
 	std::string r = test;
+  std::cout << "override-dep: GetString: " << r << "\n";
 	delete[] test;
 	return r;
-}
\ No newline at end of file
+}
+
+
+class Static {
+private:
+  void* p;
+public:
+  Static() {
+    printf("override-dep: static constructor\n");
+    p = malloc(64);
+    return;
+  }
+  ~Static() {
+    free(p);
+    printf("override-dep: static destructor\n");
+    return;
+  }
+};
+
+static Static s = Static();
+
+
+#include <windows.h>
+
+BOOL WINAPI DllMain(HINSTANCE module, DWORD reason, LPVOID reserved) {
+  (void)(reserved);
+  (void)(module);
+  if (reason==DLL_PROCESS_ATTACH) {
+    printf("override-dep: dll attach\n");
+  }
+  else if (reason==DLL_PROCESS_DETACH) {
+    printf("override-dep: dll detach\n");
+  }  
+  return TRUE;
+}
diff --git a/test/main-override.cpp b/test/main-override.cpp
index db594acc..576f47bc 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -37,7 +37,7 @@ static void test_thread_local();      // issue #944
 static void test_mixed1();             // issue #942
 static void test_stl_allocators();
 
-#if x_WIN32
+#if _WIN32
 #include "main-override-dep.h"
 static void test_dep();               // issue #981: test overriding in another DLL
 #else
@@ -145,11 +145,12 @@ static bool test_stl_allocator1() {
 struct some_struct { int i; int j; double z; };
 
 
-#if x_WIN32
+#if _WIN32
 static void test_dep()
 {
   TestAllocInDll t;
   std::string s = t.GetString();
+  std::cout << "test_dep GetString: " << s << "\n";
 }
 #endif
 

From f3e92b1edd851a4d1e2e2c4bbada87f2855dc834 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Fri, 2 May 2025 16:12:20 -0700
Subject: [PATCH 302/352] Use second user TLS slot to avoid using reserved
 fields in the TEB (issue #1078)

---
 include/mimalloc/prim.h | 15 ++++++++-------
 src/prim/windows/prim.c | 16 ++++++++++++++--
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index bddd66e9..b0ddc2d0 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -208,19 +208,20 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS)
 
 // On windows we can store the thread-local heap at a fixed TLS slot to avoid
-// thread-local initialization checks in the fast path. This uses a fixed location
-// in the TCB though (last user-reserved slot by default) which may clash with other applications.
-
+// thread-local initialization checks in the fast path. 
+// We always use the second user TLS slot (the first one is always allocated already),
+// and at initialization (`windows/prim.c`) we call TlsAlloc and verify
+// we indeed get the second slot (and fail otherwise).
+// Todo: we could make the Tls slot completely dynamic but that would require
+// an extra read of the static Tls slot instead of using a constant offset.
 #define MI_HAS_TLS_SLOT      2              // 2 = we can reliably initialize the slot (saving a test on each malloc)
 
 #if MI_WIN_USE_FIXED_TLS > 1
 #define MI_TLS_SLOT     (MI_WIN_USE_FIXED_TLS)
 #elif MI_SIZE_SIZE == 4
-#define MI_TLS_SLOT     (0x710)             // Last user-reserved slot <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
-// #define MI_TLS_SLOT  (0xF0C)             // Last TlsSlot (might clash with other app reserved slot)
+#define MI_TLS_SLOT     (0x0E18)            // Second User TLS slot <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
 #else
-#define MI_TLS_SLOT     (0x888)             // Last user-reserved slot <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
-// #define MI_TLS_SLOT  (0x1678)            // Last TlsSlot (might clash with other app reserved slot)
+#define MI_TLS_SLOT     (0x1488)            // Second User TLS slot <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
 #endif
 
 static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index a080f4bc..7daa09ef 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -631,8 +631,20 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   MI_UNUSED(reserved);
   MI_UNUSED(module);
   #if MI_TLS_SLOT >= 2
-  if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) {
-    _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
+  if (reason==DLL_PROCESS_ATTACH) {
+    const DWORD tls_slot = TlsAlloc();
+    if (tls_slot != 1) { 
+      _mi_error_message(EFAULT, "unable to allocate the second TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); 
+    }
+  }
+  if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
+    if (mi_prim_get_default_heap() == NULL) {
+      _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
+    }
+    #if MI_DEBUG
+    void* const p = TlsGetValue(1);
+    mi_assert_internal(p == (void*)&_mi_heap_empty);
+    #endif  
   }
   #endif
   if (reason==DLL_PROCESS_ATTACH) {

From 417e8176bddce86c2d53656c5552f5bb96304c46 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Fri, 2 May 2025 16:27:00 -0700
Subject: [PATCH 303/352] add unix large page size constant and adjust aligment
 to the large page size for large allocations

---
 src/prim/unix/prim.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index f93e458a..f3ccb013 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -70,6 +70,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MADV_FREE  POSIX_MADV_FREE
 #endif
 
+#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
 
 //------------------------------------------------------------------------------------
 // Use syscalls for some primitives to allow for libraries that override open/read/close etc.
@@ -156,7 +157,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
     }
     #endif
   }
-  config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
+  config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE;
   config->has_overcommit = unix_detect_overcommit();
   config->has_partial_free = true;    // mmap can free in parts
   config->has_virtual_reserve = true; // todo: check if this true for NetBSD?  (for anonymous mmap with PROT_NONE)
@@ -386,6 +387,9 @@ int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool comm
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
+  if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) {
+    try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations
+  }
 
   *is_zero = true;
   int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
@@ -433,7 +437,7 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
   int err = 0;
   // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
   err = unix_madvise(start, size, MADV_DONTNEED);
-  #if !MI_DEBUG && !MI_SECURE
+  #if !MI_DEBUG && MI_SECURE<=2
     *needs_recommit = false;
   #else
     *needs_recommit = true;

From e4c5d09d65ff7743fe5e5dfadd6f082e839ff791 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Sun, 4 May 2025 09:04:57 -0700
Subject: [PATCH 304/352] improve TLS access on Windows with msvc (by Frank
 Richter, issue #1078)

---
 ide/vs2022/mimalloc-test-stress.vcxproj | 4 ++--
 include/mimalloc/prim.h                 | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj
index d6af71ce..128a4ff6 100644
--- a/ide/vs2022/mimalloc-test-stress.vcxproj
+++ b/ide/vs2022/mimalloc-test-stress.vcxproj
@@ -282,8 +282,8 @@
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="mimalloc-lib.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
+    <ProjectReference Include="mimalloc-override-dll.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
     </ProjectReference>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index b0ddc2d0..a722d721 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS)
 
 // On windows we can store the thread-local heap at a fixed TLS slot to avoid
-// thread-local initialization checks in the fast path. 
+// thread-local initialization checks in the fast path.
 // We always use the second user TLS slot (the first one is always allocated already),
 // and at initialization (`windows/prim.c`) we call TlsAlloc and verify
 // we indeed get the second slot (and fail otherwise).
@@ -270,6 +270,9 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 
 
 // defined in `init.c`; do not use these directly
+#ifdef _MSC_VER
+__declspec(selectany)  // make it part of the comdat section to have faster TLS access (issue #1078)
+#endif
 extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
 extern bool _mi_process_is_initialized;             // has mi_process_init been called?
 

From f989a1cbb9b63043f1e56d248efe1ede9a6651d7 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Sun, 4 May 2025 09:10:38 -0700
Subject: [PATCH 305/352] add more decl_hidden specifiers on extern variables
 to improve access on arm64

---
 include/mimalloc/internal.h | 6 +++---
 include/mimalloc/prim.h     | 6 +++---
 src/page.c                  | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 51fad09c..c9362aa0 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -96,7 +96,7 @@ uintptr_t   _mi_os_random_weak(uintptr_t extra_seed);
 static inline uintptr_t _mi_random_shuffle(uintptr_t x);
 
 // init.c
-extern mi_decl_cache_align mi_stats_t       _mi_stats_main;
+extern mi_decl_hidden mi_decl_cache_align mi_stats_t       _mi_stats_main;
 extern mi_decl_hidden mi_decl_cache_align const mi_page_t  _mi_page_empty;
 void        _mi_process_load(void);
 void mi_cdecl _mi_process_done(void);
@@ -958,8 +958,8 @@ static inline size_t mi_popcount(size_t x) {
 
 #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
 #include <intrin.h>
-extern bool _mi_cpu_has_fsrm;
-extern bool _mi_cpu_has_erms;
+extern mi_decl_hidden bool _mi_cpu_has_fsrm;
+extern mi_decl_hidden bool _mi_cpu_has_erms;
 static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
   if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
     __movsb((unsigned char*)dst, (const unsigned char*)src, n);
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index a722d721..527bb97a 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -273,8 +273,8 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 #ifdef _MSC_VER
 __declspec(selectany)  // make it part of the comdat section to have faster TLS access (issue #1078)
 #endif
-extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
-extern bool _mi_process_is_initialized;             // has mi_process_init been called?
+extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
+extern mi_decl_hidden bool _mi_process_is_initialized;             // has mi_process_init been called?
 
 static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
 
@@ -402,7 +402,7 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) {
 
 #elif defined(MI_TLS_PTHREAD)
 
-extern pthread_key_t _mi_heap_default_key;
+extern mi_decl_hidden pthread_key_t _mi_heap_default_key;
 static inline mi_heap_t* mi_prim_get_default_heap(void) {
   mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
   return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
diff --git a/src/page.c b/src/page.c
index 6a693e89..55150f33 100644
--- a/src/page.c
+++ b/src/page.c
@@ -114,7 +114,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   return true;
 }
 
-extern bool _mi_process_is_initialized;             // has mi_process_init been called?
+extern mi_decl_hidden bool _mi_process_is_initialized;             // has mi_process_init been called?
 
 bool _mi_page_is_valid(mi_page_t* page) {
   mi_assert_internal(mi_page_is_valid_init(page));
@@ -979,9 +979,9 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
 
     // free delayed frees from other threads (but skip contended ones)
     _mi_heap_delayed_free_partial(heap);
-    
+
     // collect every once in a while (10000 by default)
-    const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L);    
+    const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L);
     if (heap->generic_collect_count >= generic_collect) {
       heap->generic_collect_count = 0;
       mi_heap_collect(heap, false /* force? */);

From 9194362e4858bdd2eaf1b1cb9075abaa6ace2460 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Sun, 4 May 2025 09:04:57 -0700
Subject: [PATCH 306/352] improve TLS access on Windows with msvc (by Frank
 Richter, issue #1078)

---
 ide/vs2022/mimalloc-test-stress.vcxproj | 4 ++--
 include/mimalloc/prim.h                 | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj
index d6af71ce..128a4ff6 100644
--- a/ide/vs2022/mimalloc-test-stress.vcxproj
+++ b/ide/vs2022/mimalloc-test-stress.vcxproj
@@ -282,8 +282,8 @@
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="mimalloc-lib.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
+    <ProjectReference Include="mimalloc-override-dll.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
     </ProjectReference>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index b0ddc2d0..a722d721 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS)
 
 // On windows we can store the thread-local heap at a fixed TLS slot to avoid
-// thread-local initialization checks in the fast path. 
+// thread-local initialization checks in the fast path.
 // We always use the second user TLS slot (the first one is always allocated already),
 // and at initialization (`windows/prim.c`) we call TlsAlloc and verify
 // we indeed get the second slot (and fail otherwise).
@@ -270,6 +270,9 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 
 
 // defined in `init.c`; do not use these directly
+#ifdef _MSC_VER
+__declspec(selectany)  // make it part of the comdat section to have faster TLS access (issue #1078)
+#endif
 extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
 extern bool _mi_process_is_initialized;             // has mi_process_init been called?
 

From 9c24c428cb06c735ccc3dcca996c2d09bb139d08 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Sun, 4 May 2025 09:10:38 -0700
Subject: [PATCH 307/352] add more decl_hidden specifiers on extern variables
 to improve access on arm64

---
 include/mimalloc/internal.h | 6 +++---
 include/mimalloc/prim.h     | 6 +++---
 src/page.c                  | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 6283f1d1..b11bd357 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -96,7 +96,7 @@ uintptr_t   _mi_os_random_weak(uintptr_t extra_seed);
 static inline uintptr_t _mi_random_shuffle(uintptr_t x);
 
 // init.c
-extern mi_decl_cache_align mi_stats_t       _mi_stats_main;
+extern mi_decl_hidden mi_decl_cache_align mi_stats_t       _mi_stats_main;
 extern mi_decl_hidden mi_decl_cache_align const mi_page_t  _mi_page_empty;
 void        _mi_process_load(void);
 void mi_cdecl _mi_process_done(void);
@@ -958,8 +958,8 @@ static inline size_t mi_popcount(size_t x) {
 
 #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
 #include <intrin.h>
-extern bool _mi_cpu_has_fsrm;
-extern bool _mi_cpu_has_erms;
+extern mi_decl_hidden bool _mi_cpu_has_fsrm;
+extern mi_decl_hidden bool _mi_cpu_has_erms;
 static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
   if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
     __movsb((unsigned char*)dst, (const unsigned char*)src, n);
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index a722d721..527bb97a 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -273,8 +273,8 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 #ifdef _MSC_VER
 __declspec(selectany)  // make it part of the comdat section to have faster TLS access (issue #1078)
 #endif
-extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
-extern bool _mi_process_is_initialized;             // has mi_process_init been called?
+extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
+extern mi_decl_hidden bool _mi_process_is_initialized;             // has mi_process_init been called?
 
 static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
 
@@ -402,7 +402,7 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) {
 
 #elif defined(MI_TLS_PTHREAD)
 
-extern pthread_key_t _mi_heap_default_key;
+extern mi_decl_hidden pthread_key_t _mi_heap_default_key;
 static inline mi_heap_t* mi_prim_get_default_heap(void) {
   mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
   return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
diff --git a/src/page.c b/src/page.c
index 6a693e89..55150f33 100644
--- a/src/page.c
+++ b/src/page.c
@@ -114,7 +114,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   return true;
 }
 
-extern bool _mi_process_is_initialized;             // has mi_process_init been called?
+extern mi_decl_hidden bool _mi_process_is_initialized;             // has mi_process_init been called?
 
 bool _mi_page_is_valid(mi_page_t* page) {
   mi_assert_internal(mi_page_is_valid_init(page));
@@ -979,9 +979,9 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
 
     // free delayed frees from other threads (but skip contended ones)
     _mi_heap_delayed_free_partial(heap);
-    
+
     // collect every once in a while (10000 by default)
-    const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L);    
+    const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L);
     if (heap->generic_collect_count >= generic_collect) {
       heap->generic_collect_count = 0;
       mi_heap_collect(heap, false /* force? */);

From 63b0989df57a9dd2b867920307b0d038df695a54 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 4 May 2025 21:41:26 -0700
Subject: [PATCH 308/352] revert use of selectany for msvc (issue #1078)

---
 include/mimalloc/prim.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index 527bb97a..2d508148 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -270,9 +270,6 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 
 
 // defined in `init.c`; do not use these directly
-#ifdef _MSC_VER
-__declspec(selectany)  // make it part of the comdat section to have faster TLS access (issue #1078)
-#endif
 extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
 extern mi_decl_hidden bool _mi_process_is_initialized;             // has mi_process_init been called?
 

From 52b75693c48308e8b19b94ffa7fbc0580021ba87 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Sun, 4 May 2025 22:03:10 -0700
Subject: [PATCH 309/352] use TlsAlloc with a dynamic offset for
 MI_WIN_USE_FIXED_TLS by default (issue #1078)

---
 ide/vs2022/mimalloc-test-stress.vcxproj |  4 ++--
 include/mimalloc/prim.h                 | 13 ++++++-------
 src/prim/windows/prim.c                 | 15 ++++++++++-----
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj
index 128a4ff6..d6af71ce 100644
--- a/ide/vs2022/mimalloc-test-stress.vcxproj
+++ b/ide/vs2022/mimalloc-test-stress.vcxproj
@@ -282,8 +282,8 @@
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="mimalloc-override-dll.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
+    <ProjectReference Include="mimalloc-lib.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
     </ProjectReference>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index 2d508148..60af4d59 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -209,19 +209,18 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 
 // On windows we can store the thread-local heap at a fixed TLS slot to avoid
 // thread-local initialization checks in the fast path.
-// We always use the second user TLS slot (the first one is always allocated already),
-// and at initialization (`windows/prim.c`) we call TlsAlloc and verify
-// we indeed get the second slot (and fail otherwise).
-// Todo: we could make the Tls slot completely dynamic but that would require
-// an extra read of the static Tls slot instead of using a constant offset.
+// We allocate a user TLS slot at process initialization (see `windows/prim.c`)
+// and store the offset `_mi_win_tls_offset`.
 #define MI_HAS_TLS_SLOT      2              // 2 = we can reliably initialize the slot (saving a test on each malloc)
 
+extern mi_decl_hidden size_t _mi_win_tls_offset;
+
 #if MI_WIN_USE_FIXED_TLS > 1
 #define MI_TLS_SLOT     (MI_WIN_USE_FIXED_TLS)
 #elif MI_SIZE_SIZE == 4
-#define MI_TLS_SLOT     (0x0E18)            // Second User TLS slot <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
+#define MI_TLS_SLOT     (0x0E10 + _mi_win_tls_offset)  // User TLS slots <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
 #else
-#define MI_TLS_SLOT     (0x1488)            // Second User TLS slot <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
+#define MI_TLS_SLOT     (0x1480 + _mi_win_tls_offset)  // User TLS slots <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block>
 #endif
 
 static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 7daa09ef..c91102a2 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -627,22 +627,27 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
 // Process & Thread Init/Done
 //----------------------------------------------------------------
 
+#if MI_HAS_TLS_SLOT
+mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*);  // use 2nd slot by default
+#endif
+
 static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   MI_UNUSED(reserved);
   MI_UNUSED(module);
-  #if MI_TLS_SLOT >= 2
+  #if MI_HAS_TLS_SLOT >= 2  // we must initialize the TLS slot before any allocation
   if (reason==DLL_PROCESS_ATTACH) {
     const DWORD tls_slot = TlsAlloc();
-    if (tls_slot != 1) { 
-      _mi_error_message(EFAULT, "unable to allocate the second TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); 
+    if (tls_slot == TLS_OUT_OF_INDEXES) { 
+      _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); 
     }
+    _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
   }
   if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
     if (mi_prim_get_default_heap() == NULL) {
       _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
     }
     #if MI_DEBUG
-    void* const p = TlsGetValue(1);
+    void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
     mi_assert_internal(p == (void*)&_mi_heap_empty);
     #endif  
   }
@@ -808,7 +813,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   #endif
   mi_decl_export void _mi_redirect_entry(DWORD reason) {
     // called on redirection; careful as this may be called before DllMain
-    #if MI_TLS_SLOT >= 2
+    #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation
     if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) {
       _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
     }

From 303b196d403876f324e7456854a148e85682c2d9 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Sun, 4 May 2025 22:09:56 -0700
Subject: [PATCH 310/352] fix MI_WIN_USE_FIXED_TLS conditions

---
 src/prim/windows/prim.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index c91102a2..d0fee4c2 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -627,7 +627,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
 // Process & Thread Init/Done
 //----------------------------------------------------------------
 
-#if MI_HAS_TLS_SLOT
+#if MI_WIN_USE_FIXED_TLS==1
 mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*);  // use 2nd slot by default
 #endif
 
@@ -635,6 +635,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   MI_UNUSED(reserved);
   MI_UNUSED(module);
   #if MI_HAS_TLS_SLOT >= 2  // we must initialize the TLS slot before any allocation
+  #if MI_WIN_USE_FIXED_TLS==1
   if (reason==DLL_PROCESS_ATTACH) {
     const DWORD tls_slot = TlsAlloc();
     if (tls_slot == TLS_OUT_OF_INDEXES) { 
@@ -642,11 +643,12 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
     }
     _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
   }
+  #endif
   if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
     if (mi_prim_get_default_heap() == NULL) {
       _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
     }
-    #if MI_DEBUG
+    #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
     void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
     mi_assert_internal(p == (void*)&_mi_heap_empty);
     #endif  

From e2d7c24c7362a19429f7338f0e5ed493f7c1d7b0 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 4 May 2025 22:17:59 -0700
Subject: [PATCH 311/352] add fixed TLS slot test to pipeline on Windows

---
 azure-pipelines.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 25d4a6e0..b7fc59d4 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -42,6 +42,14 @@ jobs:
         BuildType: release
         cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32
         MSBuildConfiguration: Release
+      Debug Fixed TLS:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_WIN_USE_FIXED_TLS=ON
+        MSBuildConfiguration: Debug
+      Release Fixed TLS:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_WIN_USE_FIXED_TLS=ON
+        MSBuildConfiguration: Release
   steps:
   - task: CMake@1
     inputs:

From c84d996e884412b1fa58fa48ee6fc6e2fa841446 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 5 May 2025 10:23:52 -0700
Subject: [PATCH 312/352] fix TLS initialization for MI_WIN_USE_FIXED_TLS with
 redirection

---
 include/mimalloc/prim.h |  2 +-
 src/prim/windows/prim.c | 28 +++++++++++++++-------------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index 60af4d59..d3157949 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -119,7 +119,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
 
 
 
-
+#define MI_WIN_USE_FIXED_TLS  1
 
 //-------------------------------------------------------------------
 // Access to TLS (thread local storage) slots.
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index d0fee4c2..535d34a6 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -628,18 +628,16 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
 //----------------------------------------------------------------
 
 #if MI_WIN_USE_FIXED_TLS==1
-mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*);  // use 2nd slot by default
+mi_decl_cache_align size_t _mi_win_tls_offset = 0;  
 #endif
 
-static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
-  MI_UNUSED(reserved);
-  MI_UNUSED(module);
+static void mi_win_tls_init(DWORD reason) {
   #if MI_HAS_TLS_SLOT >= 2  // we must initialize the TLS slot before any allocation
   #if MI_WIN_USE_FIXED_TLS==1
-  if (reason==DLL_PROCESS_ATTACH) {
-    const DWORD tls_slot = TlsAlloc();
-    if (tls_slot == TLS_OUT_OF_INDEXES) { 
-      _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); 
+  if (reason==DLL_PROCESS_ATTACH && _mi_win_tls_offset == 0) {
+    const DWORD tls_slot = TlsAlloc();  // usually returns slot 1
+    if (tls_slot == TLS_OUT_OF_INDEXES) {
+      _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");
     }
     _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
   }
@@ -653,7 +651,15 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
     mi_assert_internal(p == (void*)&_mi_heap_empty);
     #endif  
   }
+  #else
+  MI_UNUSED(reason);
   #endif
+}
+
+static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
+  MI_UNUSED(reserved);
+  MI_UNUSED(module);
+  mi_win_tls_init(reason);
   if (reason==DLL_PROCESS_ATTACH) {
     _mi_process_load();
   }
@@ -815,11 +821,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   #endif
   mi_decl_export void _mi_redirect_entry(DWORD reason) {
     // called on redirection; careful as this may be called before DllMain
-    #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation
-    if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) {
-      _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
-    }
-    #endif
+    mi_win_tls_init(reason);
     if (reason == DLL_PROCESS_ATTACH) {
       mi_redirected = true;
     }

From 0184a86eaf4cf0018d544e5992b86f5ede688601 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Tue, 13 May 2025 15:32:11 -0700
Subject: [PATCH 313/352] add alpine x86 docker file

---
 contrib/docker/alpine-arm32v7/Dockerfile |  2 +-
 contrib/docker/alpine-x86/Dockerfile     | 28 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 contrib/docker/alpine-x86/Dockerfile

diff --git a/contrib/docker/alpine-arm32v7/Dockerfile b/contrib/docker/alpine-arm32v7/Dockerfile
index f74934fb..daa60f50 100644
--- a/contrib/docker/alpine-arm32v7/Dockerfile
+++ b/contrib/docker/alpine-arm32v7/Dockerfile
@@ -1,6 +1,6 @@
 # install from an image
 # download first an appropriate tar.gz image into the current directory
-# from: <https://github.com/alpinelinux/docker-alpine/tree/edge/armv7>
+# from <https://github.com/alpinelinux/docker-alpine/tree/edge/armv7>
 FROM scratch
 
 # Substitute the image name that was downloaded
diff --git a/contrib/docker/alpine-x86/Dockerfile b/contrib/docker/alpine-x86/Dockerfile
new file mode 100644
index 00000000..a0f76c17
--- /dev/null
+++ b/contrib/docker/alpine-x86/Dockerfile
@@ -0,0 +1,28 @@
+# install from an image
+# download first an appropriate tar.gz image into the current directory
+# from <https://github.com/alpinelinux/docker-alpine/tree/edge/x86>
+FROM scratch
+
+# Substitute the image name that was downloaded
+ADD alpine-minirootfs-20250108-x86.tar.gz /
+
+# Install tools
+RUN apk add build-base make cmake
+RUN apk add git
+RUN apk add vim
+
+RUN mkdir -p  /home/dev
+WORKDIR /home/dev
+
+# Get mimalloc
+RUN git clone https://github.com/microsoft/mimalloc -b dev2
+RUN mkdir -p mimalloc/out/release
+RUN mkdir -p mimalloc/out/debug
+
+# Build mimalloc debug
+WORKDIR /home/dev/mimalloc/out/debug
+RUN cmake ../.. -DMI_DEBUG_FULL=ON
+# RUN make -j
+# RUN make test
+
+CMD ["/bin/sh"]

From 341149391fee496790a7fa916b1fd3fdd0cce1a1 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Tue, 13 May 2025 15:33:29 -0700
Subject: [PATCH 314/352] fix include of prctl.h on alpine linux x86

---
 src/prim/unix/prim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index f3ccb013..a90fa659 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -32,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #if defined(__linux__)
   #include <features.h>
   #include <sys/prctl.h>    // THP disable, PR_SET_VMA
-  #if !defined(PR_SET_VMA)
+  #if defined(__GLIBC__) && !defined(PR_SET_VMA)
   #include <linux/prctl.h>
   #endif
   #if defined(__GLIBC__)

From a6ecb5c299e65eb7dd6602b97235126acc01a868 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Tue, 13 May 2025 15:35:29 -0700
Subject: [PATCH 315/352] fix format specifier (for alpine linux x86, issue
 #1086)

---
 src/arena.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index bdae8da1..aa01ffcb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -44,7 +44,7 @@ typedef struct mi_arena_s {
   mi_lock_t           abandoned_visit_lock; // lock is only used when abandoned segments are being visited
   _Atomic(size_t)     search_idx;           // optimization to start the search for free blocks
   _Atomic(mi_msecs_t) purge_expire;         // expiration time when blocks should be purged from `blocks_purge`.
-  
+
   mi_bitmap_field_t*  blocks_dirty;         // are the blocks potentially non-zero?
   mi_bitmap_field_t*  blocks_committed;     // are the blocks committed? (can be NULL for memory that cannot be decommitted)
   mi_bitmap_field_t*  blocks_purge;         // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
@@ -365,7 +365,7 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz
 static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t *arena_id)
 {
   if (_mi_preloading()) return false;  // use OS only while pre loading
-  
+
   const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count);
   if (arena_count > (MI_MAX_ARENAS - 4)) return false;
 
@@ -407,7 +407,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
 
   // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
   if (!mi_option_is_enabled(mi_option_disallow_arena_alloc)) {  // is arena allocation allowed?
-    if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) 
+    if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0)
     {
       void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid);
       if (p != NULL) return p;
@@ -487,7 +487,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks)
     // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory).
     mi_assert_internal(already_committed < blocks);
     mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
-    needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed));    
+    needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed));
   }
 
   // clear the purged blocks
@@ -556,7 +556,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
 {
   // check pre-conditions
   if (arena->memid.is_pinned) return false;
-   
+
   // expired yet?
   mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
   if (!force && (expire == 0 || expire > now)) return false;
@@ -611,7 +611,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
   return any_purged;
 }
 
-static void mi_arenas_try_purge( bool force, bool visit_all ) 
+static void mi_arenas_try_purge( bool force, bool visit_all )
 {
   if (_mi_preloading() || mi_arena_purge_delay() <= 0) return;  // nothing will be scheduled
 
@@ -628,7 +628,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all )
   mi_atomic_guard(&purge_guard)
   {
     // increase global expire: at most one purge per delay cycle
-    mi_atomic_storei64_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay());  
+    mi_atomic_storei64_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay());
     size_t max_purge_count = (visit_all ? max_arena : 2);
     bool all_visited = true;
     for (size_t i = 0; i < max_arena; i++) {
@@ -947,7 +947,7 @@ void mi_debug_show_arenas(void) mi_attr_noexcept {
   for (size_t i = 0; i < max_arenas; i++) {
     mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
     if (arena == NULL) break;
-    _mi_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, MI_ARENA_BLOCK_SIZE / MI_MiB, arena->field_count, (arena->memid.is_pinned ? ", pinned" : ""));
+    _mi_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, (size_t)(MI_ARENA_BLOCK_SIZE / MI_MiB), arena->field_count, (arena->memid.is_pinned ? ", pinned" : ""));
     if (show_inuse) {
       inuse_total += mi_debug_show_bitmap("  ", "inuse blocks", arena->block_count, arena->blocks_inuse, arena->field_count);
     }

From 72f05e2f076b3e1b160b8aaca7bc220a2532ced0 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Tue, 13 May 2025 15:58:45 -0700
Subject: [PATCH 316/352] fix guarded sample rate of 1 (issue #1085)

---
 include/mimalloc/types.h    |  1 -
 src/init.c                  | 17 ++++++++---------
 test/main-override-static.c |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index ab697f23..e2b5d318 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -512,7 +512,6 @@ struct mi_heap_s {
   size_t                guarded_size_min;                    // minimal size for guarded objects
   size_t                guarded_size_max;                    // maximal size for guarded objects
   size_t                guarded_sample_rate;                 // sample rate (set to 0 to disable guarded pages)
-  size_t                guarded_sample_seed;                 // starting sample count
   size_t                guarded_sample_count;                // current sample count (counting down to 0)
   #endif
   mi_page_t*            pages_free_direct[MI_PAGES_DIRECT];  // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
diff --git a/src/init.c b/src/init.c
index 8a48ae5e..fe0acd8a 100644
--- a/src/init.c
+++ b/src/init.c
@@ -110,7 +110,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   false,            // can reclaim
   0,                // tag
   #if MI_GUARDED
-  0, 0, 0, 0, 1,    // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
+  0, 0, 0, 1,       // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
   #endif
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY
@@ -153,7 +153,7 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = {
   false,            // can reclaim
   0,                // tag
   #if MI_GUARDED
-  0, 0, 0, 0, 0,
+  0, 0, 0, 0,
   #endif
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY
@@ -165,15 +165,14 @@ mi_stats_t _mi_stats_main = { MI_STAT_VERSION, MI_STATS_NULL };
 
 #if MI_GUARDED
 mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) {
-  heap->guarded_sample_seed = seed;
-  if (heap->guarded_sample_seed == 0) {
-    heap->guarded_sample_seed = _mi_heap_random_next(heap);
-  }
   heap->guarded_sample_rate  = sample_rate;
-  if (heap->guarded_sample_rate >= 1) {
-    heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate;
+  heap->guarded_sample_count = sample_rate;  // count down samples
+  if (heap->guarded_sample_rate > 1) {
+    if (seed == 0) {
+      seed = _mi_heap_random_next(heap);
+    }
+    heap->guarded_sample_count = (seed % heap->guarded_sample_rate) + 1;  // start at random count between 1 and `sample_rate`
   }
-  heap->guarded_sample_count = heap->guarded_sample_seed;  // count down samples
 }
 
 mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) {
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 06d7baa5..c94b98f4 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -43,7 +43,7 @@ int main() {
   // corrupt_free();
   // block_overflow1();
   // block_overflow2();
-  // test_canary_leak();
+  test_canary_leak();
   // test_aslr();
   // invalid_free();
   // test_reserved();

From 0ae310327f83abd3b354bf03b819f3595be0daf2 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 13 May 2025 16:22:08 -0700
Subject: [PATCH 317/352] fix debug assertion for windows TLS

---
 src/prim/windows/prim.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 535d34a6..6ab715e6 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -644,12 +644,12 @@ static void mi_win_tls_init(DWORD reason) {
   #endif
   if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
     if (mi_prim_get_default_heap() == NULL) {
-      _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
+      _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);  
+      #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
+      void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
+      mi_assert_internal(p == (void*)&_mi_heap_empty);
+      #endif  
     }
-    #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
-    void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
-    mi_assert_internal(p == (void*)&_mi_heap_empty);
-    #endif  
   }
   #else
   MI_UNUSED(reason);

From df3e1916209b3783bb3d001013ce8fbba4815da6 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 13 May 2025 16:38:53 -0700
Subject: [PATCH 318/352] make windows fixed TLS opt-in

---
 include/mimalloc/prim.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index d3157949..fbf0cc74 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -118,9 +118,6 @@ void _mi_prim_thread_done_auto_done(void);
 void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
 
 
-
-#define MI_WIN_USE_FIXED_TLS  1
-
 //-------------------------------------------------------------------
 // Access to TLS (thread local storage) slots.
 // We need fast access to both a unique thread id (in `free.c:mi_free`) and

From 41cc1bfe5199fbfc4dc5e7c7ecb1453ad4e8ad7b Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Tue, 13 May 2025 17:38:16 -0700
Subject: [PATCH 319/352] add guarded TLS test for Windows fixed TLS

---
 ide/vs2022/mimalloc-test-stress.vcxproj |  4 +--
 include/mimalloc/prim.h                 |  2 +-
 src/init.c                              |  1 -
 src/prim/windows/prim.c                 | 33 ++++++++++++++-----------
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj
index d6af71ce..128a4ff6 100644
--- a/ide/vs2022/mimalloc-test-stress.vcxproj
+++ b/ide/vs2022/mimalloc-test-stress.vcxproj
@@ -282,8 +282,8 @@
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="mimalloc-lib.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
+    <ProjectReference Include="mimalloc-override-dll.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
     </ProjectReference>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index fbf0cc74..3d8f1806 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
 // thread-local initialization checks in the fast path.
 // We allocate a user TLS slot at process initialization (see `windows/prim.c`)
 // and store the offset `_mi_win_tls_offset`.
-#define MI_HAS_TLS_SLOT      2              // 2 = we can reliably initialize the slot (saving a test on each malloc)
+#define MI_HAS_TLS_SLOT  1              // 2 = we can reliably initialize the slot (saving a test on each malloc)
 
 extern mi_decl_hidden size_t _mi_win_tls_offset;
 
diff --git a/src/init.c b/src/init.c
index fe0acd8a..ff6c5d29 100644
--- a/src/init.c
+++ b/src/init.c
@@ -225,7 +225,6 @@ mi_heap_t* _mi_heap_main_get(void) {
   return &_mi_heap_main;
 }
 
-
 /* -----------------------------------------------------------
   Sub process
 ----------------------------------------------------------- */
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 6ab715e6..9ffacaa3 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -631,18 +631,23 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
 mi_decl_cache_align size_t _mi_win_tls_offset = 0;  
 #endif
 
+//static void mi_debug_out(const char* s) {
+//  HANDLE h = GetStdHandle(STD_ERROR_HANDLE);
+//  WriteConsole(h, s, (DWORD)_mi_strlen(s), NULL, NULL);
+//}
+
 static void mi_win_tls_init(DWORD reason) {
-  #if MI_HAS_TLS_SLOT >= 2  // we must initialize the TLS slot before any allocation
-  #if MI_WIN_USE_FIXED_TLS==1
-  if (reason==DLL_PROCESS_ATTACH && _mi_win_tls_offset == 0) {
-    const DWORD tls_slot = TlsAlloc();  // usually returns slot 1
-    if (tls_slot == TLS_OUT_OF_INDEXES) {
-      _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");
-    }
-    _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
-  }
-  #endif
   if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
+    #if MI_WIN_USE_FIXED_TLS==1  // we must allocate a TLS slot dynamically
+    if (_mi_win_tls_offset == 0 && reason=DLL_PROCESS_ATTACH) {      
+      const DWORD tls_slot = TlsAlloc();  // usually returns slot 1
+      if (tls_slot == TLS_OUT_OF_INDEXES) {
+        _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");
+      }
+      _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
+    }
+    #endif
+    #if MI_HAS_TLS_SLOT >= 2  // we must initialize the TLS slot before any allocation
     if (mi_prim_get_default_heap() == NULL) {
       _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);  
       #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
@@ -650,10 +655,8 @@ static void mi_win_tls_init(DWORD reason) {
       mi_assert_internal(p == (void*)&_mi_heap_empty);
       #endif  
     }
-  }
-  #else
-  MI_UNUSED(reason);
-  #endif
+    #endif  
+  }  
 }
 
 static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
@@ -676,7 +679,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   #define MI_PRIM_HAS_PROCESS_ATTACH  1
 
   // Windows DLL: easy to hook into process_init and thread_done
-  BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
+  BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {    
     mi_win_main((PVOID)inst,reason,reserved);
     return TRUE;
   }

From 15c917ef1522957a260686cf027a9f294ba1c5cd Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 13 May 2025 17:45:10 -0700
Subject: [PATCH 320/352] fix syntax error

---
 src/prim/windows/prim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 9ffacaa3..b82918c1 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -639,7 +639,7 @@ mi_decl_cache_align size_t _mi_win_tls_offset = 0;
 static void mi_win_tls_init(DWORD reason) {
   if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
     #if MI_WIN_USE_FIXED_TLS==1  // we must allocate a TLS slot dynamically
-    if (_mi_win_tls_offset == 0 && reason=DLL_PROCESS_ATTACH) {      
+    if (_mi_win_tls_offset == 0 && reason == DLL_PROCESS_ATTACH) {
       const DWORD tls_slot = TlsAlloc();  // usually returns slot 1
       if (tls_slot == TLS_OUT_OF_INDEXES) {
         _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");

From 08c33768a5344e43a1ba95b88c3adcbb6a5c3498 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 21 May 2025 11:09:34 -0700
Subject: [PATCH 321/352] fix stats for pages and page_bins

---
 include/mimalloc/internal.h | 2 ++
 src/heap.c                  | 4 +---
 src/page-queue.c            | 4 ++--
 src/page.c                  | 5 ++---
 src/segment.c               | 1 +
 src/stats.c                 | 6 +++++-
 test/test-stress.c          | 1 +
 7 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index b11bd357..e277f0ff 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -217,6 +217,7 @@ void        _mi_deferred_free(mi_heap_t* heap, bool force);
 void        _mi_page_free_collect(mi_page_t* page,bool force);
 void        _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page);   // callback from segments
 
+size_t      _mi_page_bin(const mi_page_t* page); // for stats
 size_t      _mi_bin_size(size_t bin);            // for stats
 size_t      _mi_bin(size_t size);                // for stats
 
@@ -233,6 +234,7 @@ bool        _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* pa
 
 // "stats.c"
 void        _mi_stats_done(mi_stats_t* stats);
+void        _mi_stats_merge_thread(mi_tld_t* tld);
 mi_msecs_t  _mi_clock_now(void);
 mi_msecs_t  _mi_clock_end(mi_msecs_t start);
 mi_msecs_t  _mi_clock_start(void);
diff --git a/src/heap.c b/src/heap.c
index 0ea9a2ff..118438b9 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -169,9 +169,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   _mi_arenas_collect(collect == MI_FORCE /* force purge? */);
 
   // merge statistics
-  if (collect <= MI_FORCE) {
-    mi_stats_merge();
-  }
+  if (collect <= MI_FORCE) { _mi_stats_merge_thread(heap->tld); }
 }
 
 void _mi_heap_collect_abandon(mi_heap_t* heap) {
diff --git a/src/page-queue.c b/src/page-queue.c
index 3507505d..38b9aff4 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -136,7 +136,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
 }
 #endif
 
-static size_t mi_page_bin(const mi_page_t* page) {
+size_t _mi_page_bin(const mi_page_t* page) {
   const size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page))));
   mi_assert_internal(bin <= MI_BIN_FULL);
   return bin;
@@ -144,7 +144,7 @@ static size_t mi_page_bin(const mi_page_t* page) {
 
 static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
   mi_assert_internal(heap!=NULL);
-  const size_t bin = mi_page_bin(page);
+  const size_t bin = _mi_page_bin(page);
   mi_page_queue_t* pq = &heap->pages[bin];
   mi_assert_internal((mi_page_block_size(page) == pq->block_size) ||
                        (mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) ||
diff --git a/src/page.c b/src/page.c
index 55150f33..89acb409 100644
--- a/src/page.c
+++ b/src/page.c
@@ -290,7 +290,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
   mi_assert_internal(full_block_size >= block_size);
   mi_page_init(heap, page, full_block_size, heap->tld);
   mi_heap_stat_increase(heap, pages, 1);
-  mi_heap_stat_increase(heap, page_bins[mi_page_bin(page)], 1);
+  mi_heap_stat_increase(heap, page_bins[_mi_page_bin(page)], 1);
   if (pq != NULL) { mi_page_queue_push(heap, pq, page); }
   mi_assert_expensive(_mi_page_is_valid(page));
   return page;
@@ -443,8 +443,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   mi_segments_tld_t* segments_tld = &heap->tld->segments;
   mi_page_queue_remove(pq, page);
 
-  // and free it
-  mi_heap_stat_decrease(heap, page_bins[mi_page_bin(page)], 1);
+  // and free it  
   mi_page_set_heap(page,NULL);
   _mi_segment_page_free(page, force, segments_tld);
 }
diff --git a/src/segment.c b/src/segment.c
index 75f8dacb..708ddd00 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -718,6 +718,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
   size_t inuse = page->capacity * mi_page_block_size(page);
   _mi_stat_decrease(&tld->stats->page_committed, inuse);
   _mi_stat_decrease(&tld->stats->pages, 1);
+  _mi_stat_decrease(&tld->stats->page_bins[_mi_page_bin(page)], 1);
 
   page->is_zero_init = false;
   page->segment_in_use = false;
diff --git a/src/stats.c b/src/stats.c
index 92bc049c..ec8b65a3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -395,6 +395,10 @@ void mi_stats_merge(void) mi_attr_noexcept {
   mi_stats_merge_from( mi_stats_get_default() );
 }
 
+void _mi_stats_merge_thread(mi_tld_t* tld) {
+  mi_stats_merge_from( &tld->stats );
+}
+
 void _mi_stats_done(mi_stats_t* stats) {  // called from `mi_thread_done`
   mi_stats_merge_from(stats);
 }
@@ -498,7 +502,7 @@ static bool mi_heap_buf_expand(mi_heap_buf_t* hbuf) {
     hbuf->buf[hbuf->size-1] = 0;
   }
   if (hbuf->size > SIZE_MAX/2 || !hbuf->can_realloc) return false;
-  const size_t newsize = (hbuf->size == 0 ? 2*MI_KiB : 2*hbuf->size);
+  const size_t newsize = (hbuf->size == 0 ? mi_good_size(12*MI_KiB) : 2*hbuf->size);
   char* const  newbuf  = (char*)mi_rezalloc(hbuf->buf, newsize);
   if (newbuf == NULL) return false;
   hbuf->buf = newbuf;
diff --git a/test/test-stress.c b/test/test-stress.c
index 9e041064..1abe56d2 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -337,6 +337,7 @@ int main(int argc, char** argv) {
     mi_free(json);
   }
   #endif
+  mi_collect(true);
   mi_stats_print(NULL);  
 #endif
   //bench_end_program();

From 6cb4861f3eb4757ad4d1f1b0ef6aca793244381e Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 21 May 2025 17:36:31 -0700
Subject: [PATCH 322/352] fix format specifier for numa nodes

---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index ec8b65a3..89d0e103 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -348,7 +348,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_counter_print(&stats->malloc_guarded_count, "guarded", out, arg);
   mi_stat_print(&stats->threads, "threads", -1, out, arg);
   mi_stat_counter_print_avg(&stats->page_searches, "searches", out, arg);
-  _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count());
+  _mi_fprintf(out, arg, "%10s: %5i\n", "numa nodes", _mi_os_numa_node_count());
 
   size_t elapsed;
   size_t user_time;

From 44e370bdaaaf2ba9d062113ba3902624a29eaa25 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 21 May 2025 19:20:31 -0700
Subject: [PATCH 323/352] fix format specifier in stat output

---
 src/stats.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 89d0e103..03eafb66 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -359,9 +359,9 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   size_t peak_commit;
   size_t page_faults;
   mi_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
-  _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
-  _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process",
-              user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults );
+  _mi_fprintf(out, arg, "%10s: %5zu.%03zu s\n", "elapsed", elapsed/1000, elapsed%1000);
+  _mi_fprintf(out, arg, "%10s: user: %zu.%03zu s, system: %zu.%03zu s, faults: %zu, rss: ", "process",
+              user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, page_faults );
   mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s");
   if (peak_commit > 0) {
     _mi_fprintf(out, arg, ", commit: ");

From 3e32b4c38563b6d037d8289cc0a73ea694f88ff1 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Wed, 28 May 2025 08:37:34 -0700
Subject: [PATCH 324/352] fix OS allocation size tracking in the memid

---
 include/mimalloc/internal.h |  4 +++-
 src/os.c                    | 11 +++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index e277f0ff..2e770943 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -784,8 +784,10 @@ static inline mi_memid_t _mi_memid_none(void) {
   return _mi_memid_create(MI_MEM_NONE);
 }
 
-static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) {
+static inline mi_memid_t _mi_memid_create_os(void* base, size_t size, bool committed, bool is_zero, bool is_large) {
   mi_memid_t memid = _mi_memid_create(MI_MEM_OS);
+  memid.mem.os.base = base;
+  memid.mem.os.size = size;
   memid.initially_committed = committed;
   memid.initially_zero = is_zero;
   memid.is_pinned = is_large;
diff --git a/src/os.c b/src/os.c
index be7e532c..d134feba 100644
--- a/src/os.c
+++ b/src/os.c
@@ -339,7 +339,7 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) {
   bool os_is_zero  = false;
   void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero);
   if (p != NULL) {
-    *memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
+    *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large);
   }
   return p;
 }
@@ -357,10 +357,9 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
   void* os_base = NULL;
   void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base );
   if (p != NULL) {
-    *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
+    *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
     memid->mem.os.base = os_base;
-    // memid->mem.os.alignment = alignment;
-    memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base);  // todo: return from prim_alloc_aligned
+    memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base);  // todo: return from prim_alloc_aligned?
   }
   return p;
 }
@@ -618,7 +617,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
   if (psize != NULL) *psize = 0;
   if (pages_reserved != NULL) *pages_reserved = 0;
   size_t size = 0;
-  uint8_t* start = mi_os_claim_huge_pages(pages, &size);
+  uint8_t* const start = mi_os_claim_huge_pages(pages, &size);
   if (start == NULL) return NULL; // or 32-bit systems
 
   // Allocate one page at the time but try to place them contiguously
@@ -674,7 +673,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
   if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
   if (page != 0) {
     mi_assert(start != NULL);
-    *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);
+    *memid = _mi_memid_create_os(start, size, true /* is committed */, all_zero, true /* is_large */);
     memid->memkind = MI_MEM_OS_HUGE;
     mi_assert(memid->is_pinned);
     #ifdef MI_TRACK_ASAN

From b2637835826108d6aeeea295119971e5d542b0d7 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Thu, 29 May 2025 12:12:02 -0700
Subject: [PATCH 325/352] update readme

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 8b2ada1f..fd97c28c 100644
--- a/readme.md
+++ b/readme.md
@@ -85,7 +85,7 @@ Enjoy!
 ### Releases
 
 * 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including:
-  fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, 
+  fix arm32 pre-v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, 
   fix execution on non BMI1 x64 systems. 
 * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. 
   Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. 

From 715acc03294d36c08c8f9f0dc0cbb2a87c320f2b Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Fri, 30 May 2025 09:29:35 -0700
Subject: [PATCH 326/352] mark assert_fail as cold and noreturn; move assert to
 internal.h (see issue #1091, and python/cpython#134586)

---
 include/mimalloc/internal.h | 94 +++++++++++++++++++++++++++----------
 include/mimalloc/types.h    | 21 ---------
 src/options.c               |  2 +-
 3 files changed, 69 insertions(+), 48 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 2e770943..ddd0ba4f 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -8,7 +8,6 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_INTERNAL_H
 #define MIMALLOC_INTERNAL_H
 
-
 // --------------------------------------------------------------------------
 // This file contains the internal API's of mimalloc and various utility
 // functions and macros.
@@ -17,6 +16,11 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "types.h"
 #include "track.h"
 
+
+// --------------------------------------------------------------------------
+// Compiler defines
+// --------------------------------------------------------------------------
+
 #if (MI_DEBUG>0)
 #define mi_trace_message(...)  _mi_trace_message(__VA_ARGS__)
 #else
@@ -30,37 +34,69 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_decl_noinline        __declspec(noinline)
 #define mi_decl_thread          __declspec(thread)
 #define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
+#define mi_decl_noreturn        __declspec(noreturn)
 #define mi_decl_weak
 #define mi_decl_hidden
+#define mi_decl_cold
 #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
 #define mi_decl_noinline        __attribute__((noinline))
 #define mi_decl_thread          __thread
 #define mi_decl_cache_align     __attribute__((aligned(MI_CACHE_LINE)))
+#define mi_decl_noreturn        __attribute__((noreturn))
 #define mi_decl_weak            __attribute__((weak))
 #define mi_decl_hidden          __attribute__((visibility("hidden")))
+#if (__GNUC__ >= 4) || defined(__clang__)
+#define mi_decl_cold            __attribute__((cold))
+#else
+#define mi_decl_cold
+#endif
 #elif __cplusplus >= 201103L    // c++11
 #define mi_decl_noinline
 #define mi_decl_thread          thread_local
 #define mi_decl_cache_align     alignas(MI_CACHE_LINE)
+#define mi_decl_noreturn        [[noreturn]]
 #define mi_decl_weak
 #define mi_decl_hidden
+#define mi_decl_cold
 #else
 #define mi_decl_noinline
 #define mi_decl_thread          __thread        // hope for the best :-)
 #define mi_decl_cache_align
+#define mi_decl_noreturn        
 #define mi_decl_weak
 #define mi_decl_hidden
+#define mi_decl_cold
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define mi_unlikely(x)     (__builtin_expect(!!(x),false))
+#define mi_likely(x)       (__builtin_expect(!!(x),true))
+#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
+#define mi_unlikely(x)     (x) [[unlikely]]
+#define mi_likely(x)       (x) [[likely]]
+#else
+#define mi_unlikely(x)     (x)
+#define mi_likely(x)       (x)
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x)    0
+#endif
+
+#if defined(__cplusplus)
+#define mi_decl_externc     extern "C"
+#else
+#define mi_decl_externc
 #endif
 
 #if defined(__EMSCRIPTEN__) && !defined(__wasi__)
 #define __wasi__
 #endif
 
-#if defined(__cplusplus)
-#define mi_decl_externc       extern "C"
-#else
-#define mi_decl_externc
-#endif
+
+// --------------------------------------------------------------------------
+// Internal functions
+// --------------------------------------------------------------------------
 
 // "libc.c"
 #include    <stdarg.h>
@@ -256,26 +292,6 @@ bool        _mi_page_is_valid(mi_page_t* page);
 #endif
 
 
-// ------------------------------------------------------
-// Branches
-// ------------------------------------------------------
-
-#if defined(__GNUC__) || defined(__clang__)
-#define mi_unlikely(x)     (__builtin_expect(!!(x),false))
-#define mi_likely(x)       (__builtin_expect(!!(x),true))
-#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
-#define mi_unlikely(x)     (x) [[unlikely]]
-#define mi_likely(x)       (x) [[likely]]
-#else
-#define mi_unlikely(x)     (x)
-#define mi_likely(x)       (x)
-#endif
-
-#ifndef __has_builtin
-#define __has_builtin(x)  0
-#endif
-
-
 /* -----------------------------------------------------------
   Error codes passed to `_mi_fatal_error`
   All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
@@ -300,6 +316,32 @@ bool        _mi_page_is_valid(mi_page_t* page);
 #endif
 
 
+// ------------------------------------------------------
+// Assertions
+// ------------------------------------------------------
+
+#if (MI_DEBUG)
+// use our own assertion to print without memory allocation
+mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func) mi_attr_noexcept;
+#define mi_assert(expr)     ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__))
+#else
+#define mi_assert(x)
+#endif
+
+#if (MI_DEBUG>1)
+#define mi_assert_internal    mi_assert
+#else
+#define mi_assert_internal(x)
+#endif
+
+#if (MI_DEBUG>2)
+#define mi_assert_expensive   mi_assert
+#else
+#define mi_assert_expensive(x)
+#endif
+
+
+
 /* -----------------------------------------------------------
   Inlined definitions
 ----------------------------------------------------------- */
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index e2b5d318..855374e5 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -574,7 +574,6 @@ struct mi_tld_s {
 };
 
 
-
 // ------------------------------------------------------
 // Debug
 // ------------------------------------------------------
@@ -589,26 +588,6 @@ struct mi_tld_s {
 #define MI_DEBUG_PADDING    (0xDE)
 #endif
 
-#if (MI_DEBUG)
-// use our own assertion to print without memory allocation
-void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func );
-#define mi_assert(expr)     ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__))
-#else
-#define mi_assert(x)
-#endif
-
-#if (MI_DEBUG>1)
-#define mi_assert_internal    mi_assert
-#else
-#define mi_assert_internal(x)
-#endif
-
-#if (MI_DEBUG>2)
-#define mi_assert_expensive   mi_assert
-#else
-#define mi_assert_expensive(x)
-#endif
-
 
 // ------------------------------------------------------
 // Statistics
diff --git a/src/options.c b/src/options.c
index 772dfe66..9bb5d1b3 100644
--- a/src/options.c
+++ b/src/options.c
@@ -525,7 +525,7 @@ void _mi_warning_message(const char* fmt, ...) {
 
 
 #if MI_DEBUG
-void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) {
+mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) mi_attr_noexcept {
   _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n  assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
   abort();
 }

From e19c0222380a8d53b7d408657dfba0c03d99133c Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Fri, 30 May 2025 09:36:38 -0700
Subject: [PATCH 327/352] define mi_decl_align separate from
 mi_decl_cache_align

---
 include/mimalloc/internal.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index ddd0ba4f..e951b576 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -27,13 +27,14 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_trace_message(...)
 #endif
 
-#define MI_CACHE_LINE          64
+#define mi_decl_cache_align     mi_decl_align(64)
+
 #if defined(_MSC_VER)
 #pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
 #pragma warning(disable:26812)  // unscoped enum warning
 #define mi_decl_noinline        __declspec(noinline)
 #define mi_decl_thread          __declspec(thread)
-#define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
+#define mi_decl_align(a)        __declspec(align(a))
 #define mi_decl_noreturn        __declspec(noreturn)
 #define mi_decl_weak
 #define mi_decl_hidden
@@ -41,7 +42,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
 #define mi_decl_noinline        __attribute__((noinline))
 #define mi_decl_thread          __thread
-#define mi_decl_cache_align     __attribute__((aligned(MI_CACHE_LINE)))
+#define mi_decl_align(a)        __attribute__((aligned(a)))
 #define mi_decl_noreturn        __attribute__((noreturn))
 #define mi_decl_weak            __attribute__((weak))
 #define mi_decl_hidden          __attribute__((visibility("hidden")))
@@ -53,7 +54,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #elif __cplusplus >= 201103L    // c++11
 #define mi_decl_noinline
 #define mi_decl_thread          thread_local
-#define mi_decl_cache_align     alignas(MI_CACHE_LINE)
+#define mi_decl_align(a)        alignas(a)
 #define mi_decl_noreturn        [[noreturn]]
 #define mi_decl_weak
 #define mi_decl_hidden
@@ -61,7 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #else
 #define mi_decl_noinline
 #define mi_decl_thread          __thread        // hope for the best :-)
-#define mi_decl_cache_align
+#define mi_decl_align(a)
 #define mi_decl_noreturn        
 #define mi_decl_weak
 #define mi_decl_hidden

From 2d16d2ca84a3dd4cca8e32fdfa3819bfa2a684a4 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 30 May 2025 10:08:28 -0700
Subject: [PATCH 328/352] update azure pipeline trigger for the main branch
 (instead of master)

---
 azure-pipelines.yml | 6 ++----
 readme.md           | 7 +++----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index df608b17..d761d8c8 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -6,10 +6,8 @@
 trigger:
   branches:
     include:
-    - master
-    - dev3
-    - dev2
-    - dev
+    - main
+    - dev*
   tags:
     include:
     - v*
diff --git a/readme.md b/readme.md
index fd97c28c..76e2711e 100644
--- a/readme.md
+++ b/readme.md
@@ -72,15 +72,14 @@ Enjoy!
 
 ### Branches
 
-* `master`: latest stable release (still based on `dev2`).
+* `main`: latest stable release (still based on `dev2`).
 * `dev`:  development branch for mimalloc v1. **Use this branch for submitting PR's**.
 * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` 
           (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage
           mimalloc pages that can reduce fragmentation.
-* `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version 
+* `dev3`: development branch for mimalloc v3-beta. This branch is also downstream of `dev`. This version 
           simplifies the lock-free ownership of previous versions, has no thread-local segments any more. 
-          This improves sharing of memory between threads, and on certain large workloads may use less memory 
-          with less fragmentation.
+          This improves sharing of memory between threads, and on certain large workloads may use (much) less memory.
 
 ### Releases
 

From 1c514847996fba21af276b96d0d748b1ce8f3772 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 3 Jun 2025 11:36:07 -0700
Subject: [PATCH 329/352] fix missing csize assignment in _mi_os_free_ex

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index d134feba..dee263bc 100644
--- a/src/os.c
+++ b/src/os.c
@@ -181,7 +181,7 @@ static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) {
 void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) {
   if (mi_memkind_is_os(memid.memkind)) {
     size_t csize = memid.mem.os.size;
-    if (csize==0) { _mi_os_good_alloc_size(size); }
+    if (csize==0) { csize = _mi_os_good_alloc_size(size); }
     size_t commit_size = (still_committed ? csize : 0);
     void* base = addr;
     // different base? (due to alignment)

From 60f7e6a6a846dcf9ebe2b29530f0b309d4fd1dd2 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 3 Jun 2025 12:20:02 -0700
Subject: [PATCH 330/352] check all os_commit calls and return NULL on failure

---
 include/mimalloc/internal.h |  6 +++---
 src/os.c                    |  5 ++++-
 src/page.c                  | 26 +++++++++++++++++---------
 src/segment.c               |  4 +++-
 4 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 2e770943..e1052787 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -126,13 +126,13 @@ bool        _mi_os_has_overcommit(void);
 bool        _mi_os_has_virtual_reserve(void);
 
 bool        _mi_os_reset(void* addr, size_t size);
-bool        _mi_os_commit(void* p, size_t size, bool* is_zero);
-bool        _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
 bool        _mi_os_decommit(void* addr, size_t size);
-bool        _mi_os_protect(void* addr, size_t size);
 bool        _mi_os_unprotect(void* addr, size_t size);
 bool        _mi_os_purge(void* p, size_t size);
 bool        _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size);
+mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero);
+mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
+mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size);
 
 void*       _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
 void*       _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
diff --git a/src/os.c b/src/os.c
index dee263bc..c720c8ad 100644
--- a/src/os.c
+++ b/src/os.c
@@ -300,7 +300,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
 
       // explicitly commit only the aligned part
       if (commit) {
-        _mi_os_commit(p, size, NULL);
+        if (!_mi_os_commit(p, size, NULL)) {
+          mi_os_prim_free(p, over_size, 0);
+          return NULL;
+        }
       }
     }
     else  { // mmap can free inside an allocation
diff --git a/src/page.c b/src/page.c
index 89acb409..b34ee414 100644
--- a/src/page.c
+++ b/src/page.c
@@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
 }
 
 static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
-static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
+static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
 
 #if (MI_DEBUG>=3)
 static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
@@ -630,14 +630,14 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
 // Note: we also experimented with "bump" allocation on the first
 // allocations but this did not speed up any benchmark (due to an
 // extra test in malloc? or cache effects?)
-static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
+static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
   mi_assert_expensive(mi_page_is_valid_init(page));
   #if (MI_SECURE<=2)
   mi_assert(page->free == NULL);
   mi_assert(page->local_free == NULL);
-  if (page->free != NULL) return;
+  if (page->free != NULL) return true;
   #endif
-  if (page->capacity >= page->reserved) return;
+  if (page->capacity >= page->reserved) return true;
 
   size_t page_size;
   //uint8_t* page_start =
@@ -673,6 +673,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
   page->capacity += (uint16_t)extend;
   mi_stat_increase(tld->stats.page_committed, extend * bsize);
   mi_assert_expensive(mi_page_is_valid_init(page));
+  return true;
 }
 
 // Initialize a fresh page
@@ -724,8 +725,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_expensive(mi_page_is_valid_init(page));
 
   // initialize an initial free list
-  mi_page_extend_free(heap,page,tld);
-  mi_assert(mi_page_immediate_available(page));
+  if (mi_page_extend_free(heap,page,tld)) {
+    mi_assert(mi_page_immediate_available(page));
+  }
+  return;
 }
 
 
@@ -817,9 +820,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
   if (page_candidate != NULL) {
     page = page_candidate;
   }
-  if (page != NULL && !mi_page_immediate_available(page)) {
-    mi_assert_internal(mi_page_is_expandable(page));
-    mi_page_extend_free(heap, page, heap->tld);
+  if (page != NULL) {
+    if (!mi_page_immediate_available(page)) {
+      mi_assert_internal(mi_page_is_expandable(page));
+      if (!mi_page_extend_free(heap, page, heap->tld)) {
+        page = NULL; // failed to extend
+      }
+    }
+    mi_assert_internal(page == NULL || mi_page_immediate_available(page));
   }
 
   if (page == NULL) {
diff --git a/src/segment.c b/src/segment.c
index 708ddd00..1813a1fc 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -182,7 +182,9 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld)
 
 static void mi_segment_protect_range(void* p, size_t size, bool protect) {
   if (protect) {
-    _mi_os_protect(p, size);
+    if (!_mi_os_protect(p, size)) {
+      _mi_error_message(EFAULT,"unable to protect segment memory at %p\n", p);
+    }
   }
   else {
     _mi_os_unprotect(p, size);

From a0072ba7c335e38bcd20f854692930fcb1d00d5a Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 3 Jun 2025 15:07:15 -0700
Subject: [PATCH 331/352] fix base address if commit fails on aligned
 overallocation

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index c720c8ad..580b8af0 100644
--- a/src/os.c
+++ b/src/os.c
@@ -301,7 +301,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
       // explicitly commit only the aligned part
       if (commit) {
         if (!_mi_os_commit(p, size, NULL)) {
-          mi_os_prim_free(p, over_size, 0);
+          mi_os_prim_free(*base, over_size, 0);
           return NULL;
         }
       }

From 30a17bf1b773e57fa79c1c96667bf5163a024c02 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 3 Jun 2025 11:36:07 -0700
Subject: [PATCH 332/352] fix missing csize assignment in _mi_os_free_ex

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index d134feba..dee263bc 100644
--- a/src/os.c
+++ b/src/os.c
@@ -181,7 +181,7 @@ static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) {
 void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) {
   if (mi_memkind_is_os(memid.memkind)) {
     size_t csize = memid.mem.os.size;
-    if (csize==0) { _mi_os_good_alloc_size(size); }
+    if (csize==0) { csize = _mi_os_good_alloc_size(size); }
     size_t commit_size = (still_committed ? csize : 0);
     void* base = addr;
     // different base? (due to alignment)

From 21425bc334ff67d0daafbc1d98056a45f9fab594 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 3 Jun 2025 12:20:02 -0700
Subject: [PATCH 333/352] check all os_commit calls and return NULL on failure

---
 include/mimalloc/internal.h |  6 +++---
 src/os.c                    |  5 ++++-
 src/page.c                  | 26 +++++++++++++++++---------
 src/segment.c               |  4 +++-
 4 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index e951b576..5ee59252 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -163,13 +163,13 @@ bool        _mi_os_has_overcommit(void);
 bool        _mi_os_has_virtual_reserve(void);
 
 bool        _mi_os_reset(void* addr, size_t size);
-bool        _mi_os_commit(void* p, size_t size, bool* is_zero);
-bool        _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
 bool        _mi_os_decommit(void* addr, size_t size);
-bool        _mi_os_protect(void* addr, size_t size);
 bool        _mi_os_unprotect(void* addr, size_t size);
 bool        _mi_os_purge(void* p, size_t size);
 bool        _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size);
+mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero);
+mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
+mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size);
 
 void*       _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
 void*       _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
diff --git a/src/os.c b/src/os.c
index dee263bc..c720c8ad 100644
--- a/src/os.c
+++ b/src/os.c
@@ -300,7 +300,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
 
       // explicitly commit only the aligned part
       if (commit) {
-        _mi_os_commit(p, size, NULL);
+        if (!_mi_os_commit(p, size, NULL)) {
+          mi_os_prim_free(p, over_size, 0);
+          return NULL;
+        }
       }
     }
     else  { // mmap can free inside an allocation
diff --git a/src/page.c b/src/page.c
index 89acb409..b34ee414 100644
--- a/src/page.c
+++ b/src/page.c
@@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
 }
 
 static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
-static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
+static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
 
 #if (MI_DEBUG>=3)
 static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
@@ -630,14 +630,14 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
 // Note: we also experimented with "bump" allocation on the first
 // allocations but this did not speed up any benchmark (due to an
 // extra test in malloc? or cache effects?)
-static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
+static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
   mi_assert_expensive(mi_page_is_valid_init(page));
   #if (MI_SECURE<=2)
   mi_assert(page->free == NULL);
   mi_assert(page->local_free == NULL);
-  if (page->free != NULL) return;
+  if (page->free != NULL) return true;
   #endif
-  if (page->capacity >= page->reserved) return;
+  if (page->capacity >= page->reserved) return true;
 
   size_t page_size;
   //uint8_t* page_start =
@@ -673,6 +673,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
   page->capacity += (uint16_t)extend;
   mi_stat_increase(tld->stats.page_committed, extend * bsize);
   mi_assert_expensive(mi_page_is_valid_init(page));
+  return true;
 }
 
 // Initialize a fresh page
@@ -724,8 +725,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_assert_expensive(mi_page_is_valid_init(page));
 
   // initialize an initial free list
-  mi_page_extend_free(heap,page,tld);
-  mi_assert(mi_page_immediate_available(page));
+  if (mi_page_extend_free(heap,page,tld)) {
+    mi_assert(mi_page_immediate_available(page));
+  }
+  return;
 }
 
 
@@ -817,9 +820,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
   if (page_candidate != NULL) {
     page = page_candidate;
   }
-  if (page != NULL && !mi_page_immediate_available(page)) {
-    mi_assert_internal(mi_page_is_expandable(page));
-    mi_page_extend_free(heap, page, heap->tld);
+  if (page != NULL) {
+    if (!mi_page_immediate_available(page)) {
+      mi_assert_internal(mi_page_is_expandable(page));
+      if (!mi_page_extend_free(heap, page, heap->tld)) {
+        page = NULL; // failed to extend
+      }
+    }
+    mi_assert_internal(page == NULL || mi_page_immediate_available(page));
   }
 
   if (page == NULL) {
diff --git a/src/segment.c b/src/segment.c
index 708ddd00..1813a1fc 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -182,7 +182,9 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld)
 
 static void mi_segment_protect_range(void* p, size_t size, bool protect) {
   if (protect) {
-    _mi_os_protect(p, size);
+    if (!_mi_os_protect(p, size)) {
+      _mi_error_message(EFAULT,"unable to protect segment memory at %p\n", p);
+    }
   }
   else {
     _mi_os_unprotect(p, size);

From 6c3d75a355c14bd3e67c67aed76f0297ab24ed6f Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Tue, 3 Jun 2025 15:07:15 -0700
Subject: [PATCH 334/352] fix base address if commit fails on aligned
 overallocation

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index c720c8ad..580b8af0 100644
--- a/src/os.c
+++ b/src/os.c
@@ -301,7 +301,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
       // explicitly commit only the aligned part
       if (commit) {
         if (!_mi_os_commit(p, size, NULL)) {
-          mi_os_prim_free(p, over_size, 0);
+          mi_os_prim_free(*base, over_size, 0);
           return NULL;
         }
       }

From 2d34956bedded440b4aff9d8a53570f5c6a8e2be Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 6 Jun 2025 15:53:28 -0700
Subject: [PATCH 335/352] use main instead of master in readme

---
 azure-pipelines.yml | 35 ++---------------------------------
 readme.md           |  5 ++---
 2 files changed, 4 insertions(+), 36 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index b7fc59d4..ea915815 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -6,10 +6,8 @@
 trigger:
   branches:
     include:
-    - master
-    - dev3
-    - dev2
-    - dev
+    - main
+    - dev*
   tags:
     include:
     - v*
@@ -184,35 +182,6 @@ jobs:
 # Other OS versions (just debug mode)
 # ----------------------------------------------------------
 
-- job:
-  displayName: Windows 2019
-  pool:
-    vmImage:
-      windows-2019
-  strategy:
-    matrix:
-      Debug:
-        BuildType: debug
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
-        MSBuildConfiguration: Debug
-      Release:
-        BuildType: release
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
-        MSBuildConfiguration: Release
-  steps:
-  - task: CMake@1
-    inputs:
-      workingDirectory: $(BuildType)
-      cmakeArgs: .. $(cmakeExtraArgs)
-  - task: MSBuild@1
-    inputs:
-      solution: $(BuildType)/libmimalloc.sln
-      configuration: '$(MSBuildConfiguration)'
-      msbuildArguments: -m
-  - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration)
-    workingDirectory: $(BuildType)
-    displayName: CTest
-
 - job:
   displayName: Ubuntu 24.04
   pool:
diff --git a/readme.md b/readme.md
index cee78898..601a7e24 100644
--- a/readme.md
+++ b/readme.md
@@ -72,15 +72,14 @@ Enjoy!
 
 ### Branches
 
-* `master`: latest stable release (still based on `dev2`).
+* `main`: latest stable release (still based on `dev2`).
 * `dev`:  development branch for mimalloc v1. Use this branch for submitting PR's.
 * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` 
           (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage
           mimalloc pages that can reduce fragmentation.
 * `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version 
           simplifies the lock-free ownership of previous versions, has no thread-local segments any more. 
-          This improves sharing of memory between threads, and on certain large workloads may use less memory 
-          with less fragmentation.
+          This improves sharing of memory between threads, and on certain large workloads may use (much) less memory.
 
 ### Releases
 

From d389819cc9243c2647684544c2942ef6cc893a2a Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 6 Jun 2025 17:59:46 -0700
Subject: [PATCH 336/352] add initial support for _mi_prim_reuse and
 MADV_FREE_REUSABLE on macOS (issue #1097)

---
 include/mimalloc/internal.h |  1 +
 include/mimalloc/prim.h     |  5 +++++
 src/arena.c                 |  6 +++++-
 src/os.c                    | 11 +++++++++++
 src/prim/emscripten/prim.c  |  5 +++++
 src/prim/unix/prim.c        | 33 +++++++++++++++++++++++++++------
 src/prim/wasi/prim.c        |  5 +++++
 src/prim/windows/prim.c     |  5 +++++
 8 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 5ee59252..3e57e252 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -167,6 +167,7 @@ bool        _mi_os_decommit(void* addr, size_t size);
 bool        _mi_os_unprotect(void* addr, size_t size);
 bool        _mi_os_purge(void* p, size_t size);
 bool        _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size);
+void        _mi_os_reuse(void* p, size_t size);
 mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero);
 mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
 mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size);
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index 3d8f1806..c71678cc 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -63,6 +63,11 @@ int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit);
 // Returns error code or 0 on success.
 int _mi_prim_reset(void* addr, size_t size);
 
+// Reuse memory. This is called for memory that is already committed but
+// may have been reset (`_mi_prim_reset`) or decommitted (`_mi_prim_decommit`) where `needs_recommit` was false.
+// Returns error code or 0 on success. On most platforms this is a no-op.
+int _mi_prim_reuse(void* addr, size_t size);
+
 // Protect memory. Returns error code or 0 on success.
 int _mi_prim_protect(void* addr, size_t size, bool protect);
 
diff --git a/src/arena.c b/src/arena.c
index aa01ffcb..25cef886 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -266,12 +266,12 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
   else if (commit) {
     // commit requested, but the range may not be committed as a whole: ensure it is committed now
     memid->initially_committed = true;
+    const size_t commit_size = mi_arena_block_size(needed_bcount);      
     bool any_uncommitted;
     size_t already_committed = 0;
     _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed);
     if (any_uncommitted) {
       mi_assert_internal(already_committed < needed_bcount);
-      const size_t commit_size = mi_arena_block_size(needed_bcount);
       const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed);
       bool commit_zero = false;
       if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) {
@@ -281,6 +281,10 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
         if (commit_zero) { memid->initially_zero = true; }
       }
     }
+    else {
+      // all are already committed: signal that we are reusing memory in case it was purged before
+      _mi_os_reuse( p, commit_size );
+    }
   }
   else {
     // no need to commit, but check if already fully committed
diff --git a/src/os.c b/src/os.c
index 580b8af0..4c99d625 100644
--- a/src/os.c
+++ b/src/os.c
@@ -512,6 +512,17 @@ bool _mi_os_reset(void* addr, size_t size) {
 }
 
 
+void _mi_os_reuse( void* addr, size_t size ) {
+  // page align conservatively within the range
+  size_t csize = 0;
+  void* const start = mi_os_page_align_area_conservative(addr, size, &csize);
+  if (csize == 0) return;
+  const int err = _mi_prim_reuse(start, csize);
+  if (err != 0) {
+    _mi_warning_message("cannot reuse OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
+  }
+}
+
 // either resets or decommits memory, returns true if the memory needs
 // to be recommitted if it is to be re-used later on.
 bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size)
diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c
index a8677cbc..c4cfc35d 100644
--- a/src/prim/emscripten/prim.c
+++ b/src/prim/emscripten/prim.c
@@ -114,6 +114,11 @@ int _mi_prim_reset(void* addr, size_t size) {
   return 0;
 }
 
+int _mi_prim_reuse(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
 int _mi_prim_protect(void* addr, size_t size, bool protect) {
   MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect);
   return 0;
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index a90fa659..9ac855a5 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -433,13 +433,27 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) {
   return err;
 }
 
+int _mi_prim_reuse(void* start, size_t size) {
+  #if defined(__APPLE__) && defined(MADV_FREE_REUSE)
+  return unix_madvise(start, size, MADV_FREE_REUSE);
+  #endif
+  return 0;
+}
+
 int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
   int err = 0;
-  // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
-  err = unix_madvise(start, size, MADV_DONTNEED);
   #if !MI_DEBUG && MI_SECURE<=2
     *needs_recommit = false;
+    #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
+    // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097)
+    err = unix_madvise(start, size, MADV_FREE_REUSABLE);
+    #else
+    // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
+    err = unix_madvise(start, size, MADV_DONTNEED);
+    #endif  
   #else
+    // note: don't use MADV_FREE_REUSABLE as the range may contain protected areas
+    err = unix_madvise(start, size, MADV_DONTNEED);
     *needs_recommit = true;
     mprotect(start, size, PROT_NONE);
   #endif
@@ -454,14 +468,21 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
 }
 
 int _mi_prim_reset(void* start, size_t size) {
-  // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it
+  int err = 0;
+  #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
+  // on macOS we try to use MADV_FREE_REUSABLE as it seems the fastest
+  err = unix_madvise(start, size, MADV_FREE_REUSABLE);  
+  if (err == 0) return 0;
+  // fall through
+  #endif
+
+  #if defined(MADV_FREE)
+  // Otherwise, we try to use `MADV_FREE` as that is the fastest. A drawback though is that it
   // will not reduce the `rss` stats in tools like `top` even though the memory is available
   // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by
   // default `MADV_DONTNEED` is used though.
-  #if defined(MADV_FREE)
   static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
   int oadvice = (int)mi_atomic_load_relaxed(&advice);
-  int err;
   while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0;  };
   if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
     // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
@@ -469,7 +490,7 @@ int _mi_prim_reset(void* start, size_t size) {
     err = unix_madvise(start, size, MADV_DONTNEED);
   }
   #else
-  int err = unix_madvise(start, size, MADV_DONTNEED);
+  err = unix_madvise(start, size, MADV_DONTNEED);
   #endif
   return err;
 }
diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c
index e1e7de5e..745a41fd 100644
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@@ -149,6 +149,11 @@ int _mi_prim_reset(void* addr, size_t size) {
   return 0;
 }
 
+int _mi_prim_reuse(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
 int _mi_prim_protect(void* addr, size_t size, bool protect) {
   MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect);
   return 0;
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index b82918c1..df941af9 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -352,6 +352,11 @@ int _mi_prim_reset(void* addr, size_t size) {
   return (p != NULL ? 0 : (int)GetLastError());
 }
 
+int _mi_prim_reuse(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
 int _mi_prim_protect(void* addr, size_t size, bool protect) {
   DWORD oldprotect = 0;
   BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect);

From 2696627aafef1afc52ead72fe27c3a2a1347b27a Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 6 Jun 2025 20:07:37 -0700
Subject: [PATCH 337/352] add MI_UNUSED for unix _mi_prim_reuse

---
 src/prim/unix/prim.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 9ac855a5..8452b8c2 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -434,6 +434,7 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) {
 }
 
 int _mi_prim_reuse(void* start, size_t size) {
+  MI_UNUSED(start); MI_UNUSED(size);
   #if defined(__APPLE__) && defined(MADV_FREE_REUSE)
   return unix_madvise(start, size, MADV_FREE_REUSE);
   #endif

From d7431402c5ef192a5d9c277abdc2fb4640abc4c1 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 6 Jun 2025 20:15:16 -0700
Subject: [PATCH 338/352] fall back to MADV_DONTNEED if MADV_FREE_REUSABLE
 fails on macOS; disable use of MADV_FREE_REUSE on a reset (issue #1097)

---
 src/prim/unix/prim.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 8452b8c2..780d254f 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -443,18 +443,17 @@ int _mi_prim_reuse(void* start, size_t size) {
 
 int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
   int err = 0;
-  #if !MI_DEBUG && MI_SECURE<=2
-    *needs_recommit = false;
-    #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
+  #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
     // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097)
     err = unix_madvise(start, size, MADV_FREE_REUSABLE);
-    #else
+    if (err) { err = unix_madvise(start, size, MADV_DONTNEED); }
+  #else
     // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
     err = unix_madvise(start, size, MADV_DONTNEED);
-    #endif  
+  #endif  
+  #if !MI_DEBUG && MI_SECURE<=2
+    *needs_recommit = false;
   #else
-    // note: don't use MADV_FREE_REUSABLE as the range may contain protected areas
-    err = unix_madvise(start, size, MADV_DONTNEED);
     *needs_recommit = true;
     mprotect(start, size, PROT_NONE);
   #endif
@@ -470,10 +469,11 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
 
 int _mi_prim_reset(void* start, size_t size) {
   int err = 0;
-  #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
-  // on macOS we try to use MADV_FREE_REUSABLE as it seems the fastest
+
+  // on macOS can use MADV_FREE_REUSABLE (but we disable this for now as it seems slower)
+  #if 0 && defined(__APPLE__) && defined(MADV_FREE_REUSABLE) 
   err = unix_madvise(start, size, MADV_FREE_REUSABLE);  
-  if (err == 0) return 0;
+  if (err==0) return 0;
   // fall through
   #endif
 

From 2f0540c4f9e57c55cca4e0d621dd8b3c74843ef0 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 6 Jun 2025 20:50:50 -0700
Subject: [PATCH 339/352] add _mi_os_zalloc

---
 include/mimalloc/internal.h |  1 +
 include/mimalloc/prim.h     |  2 +-
 src/arena.c                 |  7 +-----
 src/init.c                  | 28 +++++++++--------------
 src/os.c                    | 45 ++++++++++++++++++++++++++++++-------
 src/segment-map.c           |  2 +-
 6 files changed, 51 insertions(+), 34 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 3e57e252..7250d31a 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -154,6 +154,7 @@ void        _mi_heap_guarded_init(mi_heap_t* heap);
 // os.c
 void        _mi_os_init(void);                                            // called from process init
 void*       _mi_os_alloc(size_t size, mi_memid_t* memid);
+void*       _mi_os_zalloc(size_t size, mi_memid_t* memid);
 void        _mi_os_free(void* p, size_t size, mi_memid_t memid);
 void        _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid);
 
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index c71678cc..1087d9b8 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -59,7 +59,7 @@ int _mi_prim_commit(void* addr, size_t size, bool* is_zero);
 // pre: needs_recommit != NULL
 int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit);
 
-// Reset memory. The range keeps being accessible but the content might be reset.
+// Reset memory. The range keeps being accessible but the content might be reset to zero at any moment.
 // Returns error code or 0 on success.
 int _mi_prim_reset(void* addr, size_t size);
 
diff --git a/src/arena.c b/src/arena.c
index 25cef886..ba36c415 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -188,14 +188,9 @@ void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid) {
   if (p != NULL) return p;
 
   // or fall back to the OS
-  p = _mi_os_alloc(size, memid);
+  p = _mi_os_zalloc(size, memid);
   if (p == NULL) return NULL;
 
-  // zero the OS memory if needed
-  if (!memid->initially_zero) {
-    _mi_memzero_aligned(p, size);
-    memid->initially_zero = true;
-  }
   return p;
 }
 
diff --git a/src/init.c b/src/init.c
index ff6c5d29..05ff688e 100644
--- a/src/init.c
+++ b/src/init.c
@@ -298,7 +298,6 @@ static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE];
 
 static mi_thread_data_t* mi_thread_data_zalloc(void) {
   // try to find thread metadata in the cache
-  bool is_zero = false;
   mi_thread_data_t* td = NULL;
   for (int i = 0; i < TD_CACHE_SIZE; i++) {
     td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
@@ -306,32 +305,25 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) {
       // found cached allocation, try use it
       td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
       if (td != NULL) {
-        break;
+        _mi_memzero(td, offsetof(mi_thread_data_t,memid));
+        return td;
       }
     }
   }
 
   // if that fails, allocate as meta data
+  mi_memid_t memid;
+  td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid);
   if (td == NULL) {
-    mi_memid_t memid;
-    td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid);
+    // if this fails, try once more. (issue #257)
+    td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid);
     if (td == NULL) {
-      // if this fails, try once more. (issue #257)
-      td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid);
-      if (td == NULL) {
-        // really out of memory
-        _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
-      }
-    }
-    if (td != NULL) {
-      td->memid = memid;
-      is_zero = memid.initially_zero;
+      // really out of memory
+      _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
+      return NULL;
     }
   }
-
-  if (td != NULL && !is_zero) {
-    _mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid));
-  }
+  td->memid = memid;  
   return td;
 }
 
diff --git a/src/os.c b/src/os.c
index 4c99d625..3c25ff59 100644
--- a/src/os.c
+++ b/src/os.c
@@ -182,6 +182,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
   if (mi_memkind_is_os(memid.memkind)) {
     size_t csize = memid.mem.os.size;
     if (csize==0) { csize = _mi_os_good_alloc_size(size); }
+    mi_assert_internal(csize >= size);
     size_t commit_size = (still_committed ? csize : 0);
     void* base = addr;
     // different base? (due to alignment)
@@ -341,9 +342,11 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) {
   bool os_is_large = false;
   bool os_is_zero  = false;
   void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero);
-  if (p != NULL) {
-    *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large);
-  }
+  if (p == NULL) return NULL;
+
+  *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large);  
+  mi_assert_internal(memid->mem.os.size >= size);
+  mi_assert_internal(memid->initially_committed);
   return p;
 }
 
@@ -359,14 +362,40 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
   bool os_is_zero  = false;
   void* os_base = NULL;
   void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base );
-  if (p != NULL) {
-    *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
-    memid->mem.os.base = os_base;
-    memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base);  // todo: return from prim_alloc_aligned?
-  }
+  if (p == NULL) return NULL;
+
+  *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
+  memid->mem.os.base = os_base;
+  memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base);  // todo: return from prim_alloc_aligned?
+
+  mi_assert_internal(memid->mem.os.size >= size);
+  mi_assert_internal(_mi_is_aligned(p,alignment));
+  mi_assert_internal(!commit || memid->initially_committed);
+  mi_assert_internal(!memid->initially_zero || memid->initially_committed);
   return p;
 }
 
+
+mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) {
+  if (p==NULL || size==0 || memid->initially_zero) return p;
+  if (!memid->initially_committed) {
+    bool is_zero = false;
+    if (!_mi_os_commit(p, size, &is_zero)) {
+      _mi_os_free(p, size, *memid);
+      return NULL;
+    }
+    memid->initially_committed = true;
+  }
+  _mi_memzero_aligned(p,size);
+  memid->initially_zero = true;
+  return p;
+}
+
+void*  _mi_os_zalloc(size_t size, mi_memid_t* memid) {
+  void* p = _mi_os_alloc(size,memid);
+  return mi_os_ensure_zero(p, size, memid);
+}
+
 /* -----------------------------------------------------------
   OS aligned allocation with an offset. This is used
   for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
diff --git a/src/segment-map.c b/src/segment-map.c
index 2f68f8c4..bbcea28a 100644
--- a/src/segment-map.c
+++ b/src/segment-map.c
@@ -61,7 +61,7 @@ static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bo
   if mi_unlikely(part == NULL) {
     if (!create_on_demand) return NULL;
     mi_memid_t memid;
-    part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid);
+    part = (mi_segmap_part_t*)_mi_os_zalloc(sizeof(mi_segmap_part_t), &memid);
     if (part == NULL) return NULL;
     part->memid = memid;
     mi_segmap_part_t* expected = NULL;

From 57830a4b254673de60900ab83031b5b8454d947a Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 6 Jun 2025 21:09:32 -0700
Subject: [PATCH 340/352] fix assertion in mi_os_ensure_zero

---
 src/os.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/os.c b/src/os.c
index 3c25ff59..028a5ee5 100644
--- a/src/os.c
+++ b/src/os.c
@@ -370,14 +370,15 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
 
   mi_assert_internal(memid->mem.os.size >= size);
   mi_assert_internal(_mi_is_aligned(p,alignment));
-  mi_assert_internal(!commit || memid->initially_committed);
-  mi_assert_internal(!memid->initially_zero || memid->initially_committed);
+  if (commit) { mi_assert_internal(memid->initially_committed); }
+  if (memid->initially_zero) { mi_assert_internal(memid->initially_committed); }
   return p;
 }
 
 
 mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) {
-  if (p==NULL || size==0 || memid->initially_zero) return p;
+  if (p==NULL || size==0) return p;
+  // ensure committed
   if (!memid->initially_committed) {
     bool is_zero = false;
     if (!_mi_os_commit(p, size, &is_zero)) {
@@ -386,6 +387,8 @@ mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_
     }
     memid->initially_committed = true;
   }
+  // ensure zero'd
+  if (memid->initially_zero) return p;
   _mi_memzero_aligned(p,size);
   memid->initially_zero = true;
   return p;

From d7d6c3b5c3ffe106077625bc21741b522c603f03 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Fri, 6 Jun 2025 21:22:44 -0700
Subject: [PATCH 341/352] fix assertion

---
 src/os.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/os.c b/src/os.c
index 028a5ee5..279b60fb 100644
--- a/src/os.c
+++ b/src/os.c
@@ -370,8 +370,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
 
   mi_assert_internal(memid->mem.os.size >= size);
   mi_assert_internal(_mi_is_aligned(p,alignment));
-  if (commit) { mi_assert_internal(memid->initially_committed); }
-  if (memid->initially_zero) { mi_assert_internal(memid->initially_committed); }
+  if (commit) { mi_assert_internal(memid->initially_committed); }  
   return p;
 }
 

From e7cbbbfb1417c8c4cc0855f081d1c29a64023ab5 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sat, 7 Jun 2025 09:51:35 -0700
Subject: [PATCH 342/352] add mi_process_done to the api

---
 include/mimalloc.h | 14 +++++++++-----
 src/init.c         |  6 +++++-
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 97cf7856..ce814d18 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -153,17 +153,21 @@ mi_decl_export void mi_stats_reset(void)      mi_attr_noexcept;
 mi_decl_export void mi_stats_merge(void)      mi_attr_noexcept;
 mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept;  // backward compatibility: `out` is ignored and should be NULL
 mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
-mi_decl_export void mi_options_print(void)    mi_attr_noexcept;
-
-mi_decl_export void mi_process_init(void)     mi_attr_noexcept;
-mi_decl_export void mi_thread_init(void)      mi_attr_noexcept;
-mi_decl_export void mi_thread_done(void)      mi_attr_noexcept;
 mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
+mi_decl_export void mi_options_print(void)    mi_attr_noexcept;
 
 mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
                                     size_t* current_rss, size_t* peak_rss,
                                     size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
 
+
+// Generally do not use the following as these are usually called automatically
+mi_decl_export void mi_process_init(void)     mi_attr_noexcept;
+mi_decl_export void mi_cdecl mi_process_done(void) mi_attr_noexcept;
+mi_decl_export void mi_thread_init(void)      mi_attr_noexcept;
+mi_decl_export void mi_thread_done(void)      mi_attr_noexcept;
+
+
 // -------------------------------------------------------------------------------------
 // Aligned allocation
 // Note that `alignment` always follows `size` for consistency with unaligned
diff --git a/src/init.c b/src/init.c
index 05ff688e..eb87ab3c 100644
--- a/src/init.c
+++ b/src/init.c
@@ -642,7 +642,11 @@ void mi_process_init(void) mi_attr_noexcept {
   }
 }
 
-// Called when the process is done (through `at_exit`)
+void mi_cdecl mi_process_done(void) mi_attr_noexcept {
+  _mi_process_done();
+}
+
+// Called when the process is done (cdecl as it is used with `at_exit` on some platforms)
 void mi_cdecl _mi_process_done(void) {
   // only shutdown if we were initialized
   if (!_mi_process_is_initialized) return;

From c1249a4b1502a35d68be51aceddd466a301f5a25 Mon Sep 17 00:00:00 2001
From: Daan Leijen <daan@microsoft.com>
Date: Sat, 7 Jun 2025 10:12:53 -0700
Subject: [PATCH 343/352] do not automatically call mi_process_done if
 mi_option_destroy_on_exit > 1

---
 include/mimalloc/internal.h |  6 +++---
 src/init.c                  | 14 +++++++-------
 src/prim/prim.c             | 12 ++++++------
 src/prim/windows/prim.c     | 18 +++++++++---------
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 7250d31a..a29a419a 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -63,7 +63,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_decl_noinline
 #define mi_decl_thread          __thread        // hope for the best :-)
 #define mi_decl_align(a)
-#define mi_decl_noreturn        
+#define mi_decl_noreturn
 #define mi_decl_weak
 #define mi_decl_hidden
 #define mi_decl_cold
@@ -135,8 +135,8 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x);
 // init.c
 extern mi_decl_hidden mi_decl_cache_align mi_stats_t       _mi_stats_main;
 extern mi_decl_hidden mi_decl_cache_align const mi_page_t  _mi_page_empty;
-void        _mi_process_load(void);
-void mi_cdecl _mi_process_done(void);
+void        _mi_auto_process_init(void);
+void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept;
 bool        _mi_is_redirected(void);
 bool        _mi_allocator_init(const char** message);
 void        _mi_allocator_done(void);
diff --git a/src/init.c b/src/init.c
index eb87ab3c..0a71ce05 100644
--- a/src/init.c
+++ b/src/init.c
@@ -323,7 +323,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) {
       return NULL;
     }
   }
-  td->memid = memid;  
+  td->memid = memid;
   return td;
 }
 
@@ -555,7 +555,7 @@ mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept {
 }
 
 // Called once by the process loader from `src/prim/prim.c`
-void _mi_process_load(void) {
+void _mi_auto_process_init(void) {
   mi_heap_main_init();
   #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
   volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
@@ -642,12 +642,8 @@ void mi_process_init(void) mi_attr_noexcept {
   }
 }
 
-void mi_cdecl mi_process_done(void) mi_attr_noexcept {
-  _mi_process_done();
-}
-
 // Called when the process is done (cdecl as it is used with `at_exit` on some platforms)
-void mi_cdecl _mi_process_done(void) {
+void mi_cdecl mi_process_done(void) mi_attr_noexcept {
   // only shutdown if we were initialized
   if (!_mi_process_is_initialized) return;
   // ensure we are called once
@@ -690,3 +686,7 @@ void mi_cdecl _mi_process_done(void) {
   os_preloading = true; // don't call the C runtime anymore
 }
 
+void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept {
+  if (_mi_option_get_fast(mi_option_destroy_on_exit)>1) return;
+  mi_process_done();
+}
diff --git a/src/prim/prim.c b/src/prim/prim.c
index 2002853f..5147bae8 100644
--- a/src/prim/prim.c
+++ b/src/prim/prim.c
@@ -39,29 +39,29 @@ terms of the MIT license. A copy of the license can be found in the file
     #define mi_attr_destructor  __attribute__((destructor))
   #endif
   static void mi_attr_constructor mi_process_attach(void) {
-    _mi_process_load();
+    _mi_auto_process_init();
   }
   static void mi_attr_destructor mi_process_detach(void) {
-    _mi_process_done();
+    _mi_auto_process_done();
   }
 #elif defined(__cplusplus)
   // C++: use static initialization to detect process start/end
   // This is not guaranteed to be first/last but the best we can generally do?
   struct mi_init_done_t {
     mi_init_done_t() {
-      _mi_process_load();
+      _mi_auto_process_init();
     }
     ~mi_init_done_t() {
-      _mi_process_done();
+      _mi_auto_process_done();
     }
   };
   static mi_init_done_t mi_init_done;
  #else
-  #pragma message("define a way to call _mi_process_load/done on your platform")
+  #pragma message("define a way to call _mi_auto_process_init/done on your platform")
 #endif
 #endif
 
-// Generic allocator init/done callback 
+// Generic allocator init/done callback
 #ifndef MI_PRIM_HAS_ALLOCATOR_INIT
 bool _mi_is_redirected(void) {
   return false;
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index df941af9..6752569c 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -633,7 +633,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
 //----------------------------------------------------------------
 
 #if MI_WIN_USE_FIXED_TLS==1
-mi_decl_cache_align size_t _mi_win_tls_offset = 0;  
+mi_decl_cache_align size_t _mi_win_tls_offset = 0;
 #endif
 
 //static void mi_debug_out(const char* s) {
@@ -654,14 +654,14 @@ static void mi_win_tls_init(DWORD reason) {
     #endif
     #if MI_HAS_TLS_SLOT >= 2  // we must initialize the TLS slot before any allocation
     if (mi_prim_get_default_heap() == NULL) {
-      _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);  
+      _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
       #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
       void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
       mi_assert_internal(p == (void*)&_mi_heap_empty);
-      #endif  
+      #endif
     }
-    #endif  
-  }  
+    #endif
+  }
 }
 
 static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
@@ -669,10 +669,10 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   MI_UNUSED(module);
   mi_win_tls_init(reason);
   if (reason==DLL_PROCESS_ATTACH) {
-    _mi_process_load();
+    _mi_auto_process_init();
   }
   else if (reason==DLL_PROCESS_DETACH) {
-    _mi_process_done();
+    _mi_auto_process_done();
   }
   else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) {
     _mi_thread_done(NULL);
@@ -684,7 +684,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   #define MI_PRIM_HAS_PROCESS_ATTACH  1
 
   // Windows DLL: easy to hook into process_init and thread_done
-  BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {    
+  BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
     mi_win_main((PVOID)inst,reason,reserved);
     return TRUE;
   }
@@ -762,7 +762,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
 
     static int mi_process_attach(void) {
       mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL);
-      atexit(&_mi_process_done);
+      atexit(&_mi_auto_process_done);
       return 0;
     }
     typedef int(*mi_crt_callback_t)(void);

From 82cd90083494284691326e13362db61d8d21672f Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 8 Jun 2025 15:54:01 -0700
Subject: [PATCH 344/352] make macOS interposes compile for older macOS
 versions (by @noxybot, PR #1028)

---
 src/alloc-override.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/alloc-override.c b/src/alloc-override.c
index b5109ded..52ab69c5 100644
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@@ -71,24 +71,20 @@ typedef void* mi_nothrow_t;
   #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
   #define MI_INTERPOSE_MI(fun)            MI_INTERPOSE_FUN(fun,mi_##fun)
 
-  __attribute__((used)) static struct mi_interpose_s _mi_interposes[]  __attribute__((section("__DATA, __interpose"))) =
+  #define MI_INTERPOSE_DECLS(name)        __attribute__((used)) static struct mi_interpose_s name[]  __attribute__((section("__DATA, __interpose")))
+
+  MI_INTERPOSE_DECLS(_mi_interposes) =
   {
     MI_INTERPOSE_MI(malloc),
     MI_INTERPOSE_MI(calloc),
     MI_INTERPOSE_MI(realloc),
     MI_INTERPOSE_MI(strdup),
-    #if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
-    MI_INTERPOSE_MI(strndup),
-    #endif
     MI_INTERPOSE_MI(realpath),
     MI_INTERPOSE_MI(posix_memalign),
     MI_INTERPOSE_MI(reallocf),
     MI_INTERPOSE_MI(valloc),
     MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked),
     MI_INTERPOSE_MI(malloc_good_size),
-    #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
-    MI_INTERPOSE_MI(aligned_alloc),
-    #endif
     #ifdef MI_OSX_ZONE
     // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
     MI_INTERPOSE_MI(free),
@@ -99,6 +95,12 @@ typedef void* mi_nothrow_t;
     MI_INTERPOSE_FUN(vfree,mi_cfree),
     #endif
   };
+  MI_INTERPOSE_DECLS(_mi_interposes_10_7) __OSX_AVAILABLE(10.7) = {
+    MI_INTERPOSE_MI(strndup),
+  };
+  MI_INTERPOSE_DECLS(_mi_interposes_10_15) __OSX_AVAILABLE(10.15) = {
+    MI_INTERPOSE_MI(aligned_alloc),
+  };
 
   #ifdef __cplusplus
   extern "C" {

From a981d40787251ae52c720df4b602df687da93fd2 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 8 Jun 2025 16:02:44 -0700
Subject: [PATCH 345/352] fix link error without static library build (by
 @fd00, PR #1082)

---
 CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a3acf83e..353127d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -713,10 +713,10 @@ if (MI_BUILD_TESTS)
     target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines})
     target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags})
     target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include)
-    if(MI_BUILD_SHARED AND (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN))
-      target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries})
-    else()
+    if(MI_BUILD_STATIC)
       target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries})
+    else()
+      target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries})
     endif()
     add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME})
   endforeach()

From 99ed3ea754c31e383fe88da467eb861aff9f7146 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Sun, 8 Jun 2025 16:41:04 -0700
Subject: [PATCH 346/352] enable building for xbox, based on pr #1084 by
 @maxbachmann

---
 src/prim/windows/prim.c | 48 ++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 6752569c..eebdc4a6 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -12,6 +12,10 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "mimalloc/prim.h"
 #include <stdio.h>   // fputs, stderr
 
+// xbox has no console IO
+#if !defined(WINAPI_FAMILY_PARTITION) || WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM)
+#define MI_HAS_CONSOLE_IO
+#endif
 
 //---------------------------------------------
 // Dynamically bind Windows API points for portability
@@ -45,22 +49,30 @@ typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S {
 #define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE   0x00000010
 
 #include <winternl.h>
-typedef PVOID    (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
-typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
+typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
+typedef LONG  (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);  // avoid NTSTATUS as it is not defined on xbox (pr #1084)
 static PVirtualAlloc2 pVirtualAlloc2 = NULL;
 static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
 
-// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7
+// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7  (and GetNumaNodeProcessorMask is not supported on xbox)
 typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER;
 
 typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
 typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
 typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
 typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber);
+typedef BOOL (__stdcall* PGetNumaNodeProcessorMask)(UCHAR Node, PULONGLONG ProcessorMask);
+typedef BOOL (__stdcall* PGetNumaHighestNodeNumber)(PULONG Node);
 static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
 static PGetNumaProcessorNodeEx      pGetNumaProcessorNodeEx = NULL;
 static PGetNumaNodeProcessorMaskEx  pGetNumaNodeProcessorMaskEx = NULL;
 static PGetNumaProcessorNode        pGetNumaProcessorNode = NULL;
+static PGetNumaNodeProcessorMask    pGetNumaNodeProcessorMask = NULL;
+static PGetNumaHighestNodeNumber    pGetNumaHighestNodeNumber = NULL;
+
+// Not available on xbox
+typedef SIZE_T(__stdcall* PGetLargePageMinimum)(VOID);
+static PGetLargePageMinimum pGetLargePageMinimum = NULL;
 
 // Available after Windows XP
 typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes );
@@ -74,6 +86,7 @@ static bool win_enable_large_os_pages(size_t* large_page_size)
   static bool large_initialized = false;
   if (large_initialized) return (_mi_os_large_page_size() > 0);
   large_initialized = true;
+  if (pGetLargePageMinimum==NULL) return false;  // no large page support (xbox etc.)
 
   // Try to see if large OS pages are supported
   // To use large pages on Windows, we first need access permission
@@ -92,8 +105,8 @@ static bool win_enable_large_os_pages(size_t* large_page_size)
       if (ok) {
         err = GetLastError();
         ok = (err == ERROR_SUCCESS);
-        if (ok && large_page_size != NULL) {
-          *large_page_size = GetLargePageMinimum();
+        if (ok && large_page_size != NULL && pGetLargePageMinimum != NULL) {
+          *large_page_size = (*pGetLargePageMinimum)();
         }
       }
     }
@@ -149,6 +162,9 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
     pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
     pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
     pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
+    pGetNumaNodeProcessorMask = (PGetNumaNodeProcessorMask)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMask");
+    pGetNumaHighestNodeNumber = (PGetNumaHighestNodeNumber)(void (*)(void))GetProcAddress(hDll, "GetNumaHighestNodeNumber");
+    pGetLargePageMinimum = (PGetLargePageMinimum)(void (*)(void))GetProcAddress(hDll, "GetLargePageMinimum");
     // Get physical memory (not available on XP, so check dynamically)
     PGetPhysicallyInstalledSystemMemory pGetPhysicallyInstalledSystemMemory = (PGetPhysicallyInstalledSystemMemory)(void (*)(void))GetProcAddress(hDll,"GetPhysicallyInstalledSystemMemory");
     if (pGetPhysicallyInstalledSystemMemory != NULL) {
@@ -388,7 +404,7 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num
     }
     SIZE_T psize = size;
     void* base = hint_addr;
-    NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
+    LONG err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
     if (err == 0 && base != NULL) {
       return base;
     }
@@ -442,9 +458,11 @@ size_t _mi_prim_numa_node(void) {
 
 size_t _mi_prim_numa_node_count(void) {
   ULONG numa_max = 0;
-  GetNumaHighestNodeNumber(&numa_max);
+  if (pGetNumaHighestNodeNumber!=NULL) {
+    (*pGetNumaHighestNodeNumber)(&numa_max);
+  }
   // find the highest node number that has actual processors assigned to it. Issue #282
-  while(numa_max > 0) {
+  while (numa_max > 0) {
     if (pGetNumaNodeProcessorMaskEx != NULL) {
       // Extended API is supported
       GROUP_AFFINITY affinity;
@@ -455,8 +473,10 @@ size_t _mi_prim_numa_node_count(void) {
     else {
       // Vista or earlier, use older API that is limited to 64 processors.
       ULONGLONG mask;
-      if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) {
-        if (mask != 0) break; // found the maximum non-empty node
+      if (pGetNumaNodeProcessorMask != NULL) {
+        if ((*pGetNumaNodeProcessorMask)((UCHAR)numa_max, &mask)) {
+          if (mask != 0) break; // found the maximum non-empty node
+        }
       };
     }
     // max node was invalid or had no processor assigned, try again
@@ -546,17 +566,21 @@ void _mi_prim_out_stderr( const char* msg )
   if (!_mi_preloading()) {
     // _cputs(msg);  // _cputs cannot be used as it aborts when failing to lock the console
     static HANDLE hcon = INVALID_HANDLE_VALUE;
-    static bool hconIsConsole;
+    static bool hconIsConsole = false;
     if (hcon == INVALID_HANDLE_VALUE) {
-      CONSOLE_SCREEN_BUFFER_INFO sbi;
       hcon = GetStdHandle(STD_ERROR_HANDLE);
+      #ifdef MI_HAS_CONSOLE_IO
+      CONSOLE_SCREEN_BUFFER_INFO sbi;
       hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi));
+      #endif  
     }
     const size_t len = _mi_strlen(msg);
     if (len > 0 && len < UINT32_MAX) {
       DWORD written = 0;
       if (hconIsConsole) {
+        #ifdef MI_HAS_CONSOLE_IO
         WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
+        #endif      
       }
       else if (hcon != INVALID_HANDLE_VALUE) {
         // use direct write if stderr was redirected

From 3b2daccf9d1d50ba3a58375f6e9ff0733d1a0c9a Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sun, 8 Jun 2025 16:50:29 -0700
Subject: [PATCH 347/352] fix build for TSAN tests

---
 CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 353127d5..c58e64f8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -713,10 +713,12 @@ if (MI_BUILD_TESTS)
     target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines})
     target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags})
     target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include)
-    if(MI_BUILD_STATIC)
+    if(MI_BUILD_STATIC AND NOT MI_DEBUG_TSAN)
       target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries})
-    else()
+    elseif(MI_BUILD_SHARED)
       target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries})
+    else()
+      message(STATUS "cannot build TSAN tests without MI_BUILD_SHARED being enabled")
     endif()
     add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME})
   endforeach()

From 316a434d8e282656bcad243b6d794fc7feb07038 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 9 Jun 2025 19:05:30 -0700
Subject: [PATCH 348/352] fix armv7 detection

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c58e64f8..5ce084f6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -126,7 +126,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENE
   set(MI_ARCH "x64")
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
   set(MI_ARCH "arm64")
-elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567].?|ARM)$")
   set(MI_ARCH "arm32")
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$")
   if(CMAKE_SIZEOF_VOID_P==4)

From 82b67862c8763040ee90a724e78bf4738e50eb34 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 9 Jun 2025 19:18:51 -0700
Subject: [PATCH 349/352] update vcpkg config to 1.9.4

---
 contrib/vcpkg/portfile.cmake | 5 +++--
 contrib/vcpkg/vcpkg.json     | 5 ++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake
index 69661526..b59c3675 100644
--- a/contrib/vcpkg/portfile.cmake
+++ b/contrib/vcpkg/portfile.cmake
@@ -4,8 +4,8 @@ vcpkg_from_github(
   HEAD_REF master
 
   # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1).
-  # REF "v${VERSION}"
-  REF 866ce5b89db1dbc3e66bbf89041291fd16329518
+  REF "v${VERSION}"
+  # REF 866ce5b89db1dbc3e66bbf89041291fd16329518
 
   # The sha512 is the hash of the tar.gz bundle.
   # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=<dir of this file>` and copy the sha from the error message.)
@@ -19,6 +19,7 @@ vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
     secure      MI_SECURE
     override    MI_OVERRIDE
     optarch     MI_OPT_ARCH
+    nooptarch   MI_NO_OPT_ARCH
     optsimd     MI_OPT_SIMD
     xmalloc     MI_XMALLOC
     asm         MI_SEE_ASM
diff --git a/contrib/vcpkg/vcpkg.json b/contrib/vcpkg/vcpkg.json
index 45f8097b..b38555a1 100644
--- a/contrib/vcpkg/vcpkg.json
+++ b/contrib/vcpkg/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "mimalloc",
-  "version": "1.9.2",
+  "version": "1.9.4",
   "port-version": 2,
   "description": "Compact general purpose allocator with excellent performance",
   "homepage": "https://github.com/microsoft/mimalloc",
@@ -35,6 +35,9 @@
     "optarch": {
       "description": "Use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')"
     },
+    "nooptarch": {
+      "description": "Do _not_ use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')"
+    },
     "optsimd": {
       "description": "Allow use of SIMD instructions (avx2 or neon) (requires 'optarch' to be enabled)"
     },

From 6d3c8607699da8375ec9985aaacdb1fa8ea6ea4d Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 9 Jun 2025 19:19:18 -0700
Subject: [PATCH 350/352] update readme for upcoming release

---
 readme.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/readme.md b/readme.md
index 601a7e24..71aaf7a2 100644
--- a/readme.md
+++ b/readme.md
@@ -12,9 +12,9 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the runtime systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
 
-Latest release   : `v3.0.3` (beta) (2025-03-28).  
-Latest v2 release: `v2.2.3` (2025-03-28).  
-Latest v1 release: `v1.9.3` (2024-03-28).
+Latest release   : `v3.1.4` (beta) (2025-06-09).  
+Latest v2 release: `v2.2.4` (2025-06-09).  
+Latest v1 release: `v1.9.4` (2024-06-09).
 
 mimalloc is a drop-in replacement for `malloc` and can be used in other programs
 without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@@ -77,12 +77,16 @@ Enjoy!
 * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` 
           (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage
           mimalloc pages that can reduce fragmentation.
-* `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version 
-          simplifies the lock-free ownership of previous versions, has no thread-local segments any more. 
-          This improves sharing of memory between threads, and on certain large workloads may use (much) less memory.
+* `dev3`: development branch for mimalloc v3 beta. This branch is downstream of `dev`. This version 
+          simplifies the lock-free ownership of previous versions, and improves sharing of memory between 
+          threads. On certain large workloads this version may use (much) less memory.
 
 ### Releases
 
+* 2025-06-09, `v1.9.4`, `v2.2.4`, `v3.1.4` (beta) : Some important bug fixes, including a case where OS memory
+  was not always fully released. Improved v3 performance, build on XBox, fix build on Android, support interpose 
+  for older macOS versions, use MADV_FREE_REUSABLE on macOS, always check commit success, better support for Windows 
+  fixed TLS offset, etc.
 * 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including:
   fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, 
   fix execution on non BMI1 x64 systems. 

From cfff6bfd47bd491145364b210d6552ea2c42444d Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Mon, 9 Jun 2025 20:01:28 -0700
Subject: [PATCH 351/352] bump version to v1.9.5 for further development

---
 cmake/mimalloc-config-version.cmake | 2 +-
 contrib/vcpkg/portfile.cmake        | 2 +-
 include/mimalloc.h                  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake
index 0446485b..1057b5c0 100644
--- a/cmake/mimalloc-config-version.cmake
+++ b/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
 set(mi_version_major 1)
 set(mi_version_minor 9)
-set(mi_version_patch 4)
+set(mi_version_patch 5)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake
index b59c3675..a13b57c3 100644
--- a/contrib/vcpkg/portfile.cmake
+++ b/contrib/vcpkg/portfile.cmake
@@ -9,7 +9,7 @@ vcpkg_from_github(
 
   # The sha512 is the hash of the tar.gz bundle.
   # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=<dir of this file>` and copy the sha from the error message.)
-  SHA512 0b0e5ff823c49b9534b8c32800679806c5d7c29020af058da043c3e6e36ae3c32a1cdd5a21ece97dd60bc7dd4703967f683beac435dbb8514638a6cc55e5dea8
+  SHA512 fb5aa8c2e6c15e5d22746ee40ed196f2fb7eafec9abfcbf94e7e70854734e99dd09886e1d68374fb995fe597e158100aa89260579e34cf5d9fb75d501b186d6a
 )
 
 vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
diff --git a/include/mimalloc.h b/include/mimalloc.h
index ce814d18..7bde743f 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 194   // major + 2 digits minor
+#define MI_MALLOC_VERSION 195   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes

From d21e2b8dd350356e933e08df1541dc45ea2f8719 Mon Sep 17 00:00:00 2001
From: Daan <daan@microsoft.com>
Date: Fri, 13 Jun 2025 22:18:51 -0700
Subject: [PATCH 352/352] Update readme.md for v3.1.5

---
 readme.md | 89 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 46 insertions(+), 43 deletions(-)

diff --git a/readme.md b/readme.md
index 71aaf7a2..ddf358b2 100644
--- a/readme.md
+++ b/readme.md
@@ -12,7 +12,7 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the runtime systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
 
-Latest release   : `v3.1.4` (beta) (2025-06-09).  
+Latest release   : `v3.1.5` (beta) (2025-06-13).  
 Latest v2 release: `v2.2.4` (2025-06-09).  
 Latest v1 release: `v1.9.4` (2024-06-09).
 
@@ -83,6 +83,7 @@ Enjoy!
 
 ### Releases
 
+* 2025-06-13, `v3.1.5`: Bug fix release where memory was not always correctly committed (issue #1098).
 * 2025-06-09, `v1.9.4`, `v2.2.4`, `v3.1.4` (beta) : Some important bug fixes, including a case where OS memory
   was not always fully released. Improved v3 performance, build on XBox, fix build on Android, support interpose 
   for older macOS versions, use MADV_FREE_REUSABLE on macOS, always check commit success, better support for Windows 
@@ -103,53 +104,13 @@ Enjoy!
   add 0-byte to canary; upstream CPython fixes; reduce .bss size; allow fixed TLS slot on Windows for improved performance.
 * 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches
   from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches.
-* 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation.
-* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds.
-  Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size
-  directly available (and new `block_size_shift` to improve aligned block free-ing).
-  New approach to collection of abandoned segments: When
-  a thread terminates the segments it owns are abandoned (containing still live objects) and these can be
-  reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's
-  which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in
-  an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim`
-  gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%).
-
-* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity
-  by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory
-  usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking.
-
-* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms.
-
-* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision
-  with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS
-  abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes.
-
-* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support.
-  Support arbitrary large alignments (in particular for `std::pmr` pools).
-  Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev).
-  Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho).
-  Various small bug fixes.
-
-* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow
-  detection. Initial
-  support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
-
-* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation
-  even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix
-  warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object
-  allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes.
-
-* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on
-  Windows 11, fix compilation with musl, potentially reduced
-  committed memory, add `bin/minject` for Windows,
-  improved wasm support, faster aligned allocation,
-  various small fixes.
 
 * [Older release notes](#older-release-notes)
 
 Special thanks to:
 
-* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his many contributions, and making
+* Sergiy Kuryata for his contributions on reducing memory commit -- especially on Windows with the Windows thread pool (now implemented in v3).
+* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his _many_ contributions, and making
   mimalloc work better on many less common operating systems, like Haiku, Dragonfly, etc.
 * Mary Feofanova (@mary3000), Evgeniy Moiseenko, and Manuel Pöter (@mpoeter) for making mimalloc TSAN checkable, and finding
   memory model bugs using the [genMC] model checker.
@@ -904,6 +865,48 @@ provided by the bot. You will only need to do this once across all repos using o
 
 # Older Release Notes
 
+* 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation.
+* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds.
+  Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size
+  directly available (and new `block_size_shift` to improve aligned block free-ing).
+  New approach to collection of abandoned segments: When
+  a thread terminates the segments it owns are abandoned (containing still live objects) and these can be
+  reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's
+  which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in
+  an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim`
+  gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%).
+
+* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity
+  by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory
+  usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking.
+
+* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms.
+
+* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision
+  with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS
+  abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes.
+
+* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support.
+  Support arbitrary large alignments (in particular for `std::pmr` pools).
+  Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev).
+  Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho).
+  Various small bug fixes.
+
+* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow
+  detection. Initial
+  support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
+
+* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation
+  even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix
+  warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object
+  allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes.
+
+* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on
+  Windows 11, fix compilation with musl, potentially reduced
+  committed memory, add `bin/minject` for Windows,
+  improved wasm support, faster aligned allocation,
+  various small fixes.
+
 * 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
   M1), improved performance for v2 for large objects, Python integration improvements, more standard
   installation directories, various small fixes.