From e96614961f1a6b2a3ab01e4b21fc600c23d0ba1b Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Sat, 6 Nov 2021 14:19:14 -0700
Subject: [PATCH 1/7] fix printf format type mismatches (issue #486)

---
 src/os.c    | 6 +++---
 src/stats.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/os.c b/src/os.c
index 507d9971..26b8bcd2 100644
--- a/src/os.c
+++ b/src/os.c
@@ -846,7 +846,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   if (err != 0) { err = errno; }
   #endif
   if (err != 0) {
-    _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
+    _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
     mi_mprotect_hint(err);
   }
   mi_assert_internal(err == 0);
@@ -916,7 +916,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   int err = madvise(start, csize, MADV_DONTNEED);
 #endif
   if (err != 0) {
-    _mi_warning_message("madvise reset error: start: %p, csize: 0x%x, errno: %i\n", start, csize, errno);
+    _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, errno);
   }
   //mi_assert(err == 0);
   if (err != 0) return false;
@@ -975,7 +975,7 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   if (err != 0) { err = errno; }
 #endif
   if (err != 0) {
-    _mi_warning_message("mprotect error: start: %p, csize: 0x%x, err: %i\n", start, csize, err);
+    _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err);
     mi_mprotect_hint(err);
   }
   return (err == 0);
diff --git a/src/stats.c b/src/stats.c
index 115d938e..6d486f42 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -327,7 +327,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
   mi_stat_print(&stats->threads, "threads", -1, out, arg);
   mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
-  _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count());
+  _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count());
   
   mi_msecs_t elapsed;
   mi_msecs_t user_time;

From 8c9ccea2f576394b69d0489b6f8883b95c4ddc1a Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 10 Nov 2021 10:46:06 -0800
Subject: [PATCH 2/7] fix huge page madvise in case mmap failed

---
 src/os.c | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/src/os.c b/src/os.c
index 26b8bcd2..77e8216e 100644
--- a/src/os.c
+++ b/src/os.c
@@ -510,7 +510,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
         #endif
         if (large_only) return p;
         if (p == NULL) {
-          mi_atomic_store_release(&large_page_try_ok, (uintptr_t)10);  // on error, don't try again for the next N allocations
+          mi_atomic_store_release(&large_page_try_ok, (uintptr_t)8);  // on error, don't try again for the next N allocations
         }
       }
     }
@@ -518,29 +518,30 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   if (p == NULL) {
     *is_large = false;
     p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
-    #if defined(MADV_HUGEPAGE)
-    // Many Linux systems don't allow MAP_HUGETLB but they support instead
-    // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE
-    // though since properly aligned allocations will already use large pages if available
-    // in that case -- in particular for our large regions (in `memory.c`).
-    // However, some systems only allow THP if called with explicit `madvise`, so
-    // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
-    if (allow_large && use_large_os_page(size, try_alignment)) {
-      if (madvise(p, size, MADV_HUGEPAGE) == 0) {
-        *is_large = true; // possibly
-      };
-    }
-    #endif
-    #if defined(__sun)
-    if (allow_large && use_large_os_page(size, try_alignment)) {
-      struct memcntl_mha cmd = {0};
-      cmd.mha_pagesize = large_os_page_size;
-      cmd.mha_cmd = MHA_MAPSIZE_VA;
-      if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
-        *is_large = true;
+    if (p != NULL) {
+      #if defined(MADV_HUGEPAGE)
+      // Many Linux systems don't allow MAP_HUGETLB but they support instead
+      // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE
+      // though since properly aligned allocations will already use large pages if available
+      // in that case -- in particular for our large regions (in `memory.c`).
+      // However, some systems only allow THP if called with explicit `madvise`, so
+      // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
+      if (allow_large && use_large_os_page(size, try_alignment)) {
+        if (madvise(p, size, MADV_HUGEPAGE) == 0) {
+          *is_large = true; // possibly
+        };
       }
+      #elif defined(__sun)
+      if (allow_large && use_large_os_page(size, try_alignment)) {
+        struct memcntl_mha cmd = {0};
+        cmd.mha_pagesize = large_os_page_size;
+        cmd.mha_cmd = MHA_MAPSIZE_VA;
+        if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
+          *is_large = true;
+        }
+      }      
+      #endif
     }
-    #endif
   }
   if (p == NULL) {
     _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large);

From 89abbe75d81ed4ed4858a3319a3ed938bc751ca1 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 10 Nov 2021 11:23:11 -0800
Subject: [PATCH 3/7] improve aligned support on BSD and MAP_ALIGN systems

---
 src/os.c | 56 ++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/os.c b/src/os.c
index 77e8216e..8334771c 100644
--- a/src/os.c
+++ b/src/os.c
@@ -413,23 +413,41 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
 #else
 #define MI_OS_USE_MMAP
 static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
+  UNUSED(try_alignment);  
   void* p = NULL;
+  #if defined(MAP_ALIGNED)  // BSD
+  if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) {
+    size_t n = mi_bsr(try_alignment);
+    if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
+      flags |= MAP_ALIGNED(n);
+      p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
+      if (p!=MAP_FAILED) return p;
+      // fall back to regular mmap
+    }
+  }
+  #elif defined(MAP_ALIGN)  // Solaris
+  if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) {
+    p = mmap(try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);
+    if (p!=MAP_FAILED) return p;
+    // fall back to regular mmap
+  }
+  #endif
   #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
   // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations
-  void* hint;
-  if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) {
-    p = mmap(hint,size,protect_flags,flags,fd,0);
-    if (p==MAP_FAILED) p = NULL; // fall back to regular mmap
+  if (addr == NULL) {
+    void* hint = mi_os_get_aligned_hint(try_alignment, size);
+    if (hint != NULL) {
+      p = mmap(hint, size, protect_flags, flags, fd, 0);
+      if (p!=MAP_FAILED) return p;
+      // fall back to regular mmap
+    }
   }
-  #else
-  UNUSED(try_alignment);
-  UNUSED(mi_os_get_aligned_hint);
   #endif
-  if (p==NULL) {
-    p = mmap(addr,size,protect_flags,flags,fd,0);
-    if (p==MAP_FAILED) p = NULL;
-  }
-  return p;
+  // regular mmap
+  p = mmap(addr, size, protect_flags, flags, fd, 0);
+  if (p!=MAP_FAILED) return p;  
+  // failed to allocate
+  return NULL;
 }
 
 static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
@@ -444,24 +462,17 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   int fd = -1;
   if (os_overcommit) {
     flags |= MAP_NORESERVE;
-  }
-  #if defined(MAP_ALIGNED)  // BSD
-  if (try_alignment > 0) {
-    size_t n = mi_bsr(try_alignment);
-    if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
-      flags |= MAP_ALIGNED(n);
-    }
-  }
-  #endif
+  }  
   #if defined(PROT_MAX)
   protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
   #endif
   #if defined(VM_MAKE_TAG)
   // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
   int os_tag = (int)mi_option_get(mi_option_os_tag);
-  if (os_tag < 100 || os_tag > 255) os_tag = 100;
+  if (os_tag < 100 || os_tag > 255) { os_tag = 100; }
   fd = VM_MAKE_TAG(os_tag);
   #endif
+  // huge page allocation
   if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
     static _Atomic(uintptr_t) large_page_try_ok; // = 0;
     uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
@@ -515,6 +526,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       }
     }
   }
+  // regular allocation
   if (p == NULL) {
     *is_large = false;
     p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);

From fd61997cef6af8eab86fc40da5636f95c6b6fb47 Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 10 Nov 2021 11:26:36 -0800
Subject: [PATCH 4/7] improve aligned support on BSD and MAP_ALIGN systems

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index 8334771c..d43f9afd 100644
--- a/src/os.c
+++ b/src/os.c
@@ -564,7 +564,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
 
 // On 64-bit systems, we can do efficient aligned allocation by using
 // the 2TiB to 30TiB area to allocate them.
-#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
+#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || defined(MI_OS_USE_MMAP))
 static mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
 
 // Return a 4MiB aligned address that is probably available.

From f72e5688f5b6bbbe6a61289f0ed73541518de08e Mon Sep 17 00:00:00 2001
From: Daan <daanl@outlook.com>
Date: Wed, 10 Nov 2021 11:58:04 -0800
Subject: [PATCH 5/7] remove assign in while condition

---
 src/os.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/os.c b/src/os.c
index d43f9afd..426ead4a 100644
--- a/src/os.c
+++ b/src/os.c
@@ -303,20 +303,21 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size);
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
 #if (MI_INTPTR_SIZE >= 8)
   // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations
-  void* hint;
-  if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) {
-    void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
-    if (p != NULL) return p;
-    // for robustness always fall through in case of an error
-    /*
-    DWORD err = GetLastError();
-    if (err != ERROR_INVALID_ADDRESS &&   // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210)
-        err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230)
-      return NULL;
+  if (addr == NULL) {
+    void* hint = mi_os_get_aligned_hint(try_alignment,size);
+    if (hint != NULL) {
+      void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
+      if (p != NULL) return p;
+      // for robustness always fall through in case of an error
+      /*
+      DWORD err = GetLastError();
+      if (err != ERROR_INVALID_ADDRESS &&   // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210)
+          err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230)
+        return NULL;
+      }
+      */
+      _mi_warning_message("unable to allocate hinted aligned OS memory (%zu bytes, error code: %x, address: %p, alignment: %d, flags: %x)\n", size, GetLastError(), hint, try_alignment, flags);
     }
-    */
-    _mi_warning_message("unable to allocate hinted aligned OS memory (%zu bytes, error code: %x, address: %p, alignment: %d, flags: %x)\n", size, GetLastError(), hint, try_alignment, flags);
-    // fall through on error
   } 
 #endif
 #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
@@ -414,20 +415,19 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
 #define MI_OS_USE_MMAP
 static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
   UNUSED(try_alignment);  
-  void* p = NULL;
   #if defined(MAP_ALIGNED)  // BSD
   if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) {
     size_t n = mi_bsr(try_alignment);
     if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
       flags |= MAP_ALIGNED(n);
-      p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
+      void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
       if (p!=MAP_FAILED) return p;
       // fall back to regular mmap
     }
   }
   #elif defined(MAP_ALIGN)  // Solaris
   if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) {
-    p = mmap(try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);
+    void* p = mmap(try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);
     if (p!=MAP_FAILED) return p;
     // fall back to regular mmap
   }
@@ -437,14 +437,14 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr
   if (addr == NULL) {
     void* hint = mi_os_get_aligned_hint(try_alignment, size);
     if (hint != NULL) {
-      p = mmap(hint, size, protect_flags, flags, fd, 0);
+      void* p = mmap(hint, size, protect_flags, flags, fd, 0);
       if (p!=MAP_FAILED) return p;
       // fall back to regular mmap
     }
   }
   #endif
   // regular mmap
-  p = mmap(addr, size, protect_flags, flags, fd, 0);
+  void* p = mmap(addr, size, protect_flags, flags, fd, 0);
   if (p!=MAP_FAILED) return p;  
   // failed to allocate
   return NULL;

From 8cc7d0c0195642f94cd9fc347e621d3652beeb9b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 10 Nov 2021 16:29:53 -0800
Subject: [PATCH 6/7] increase segment size to 64MiB

---
 include/mimalloc-types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 82d74f7f..8d1e5149 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -106,7 +106,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Main tuning parameters for segment and page sizes
 // Sizes for 64-bit, divide by two for 32-bit
 #define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB
-#define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  //  8MiB
+#define MI_SEGMENT_SHIFT                  (10 + MI_SEGMENT_SLICE_SHIFT)  // 64MiB
 
 #define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
 #define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB
@@ -127,7 +127,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128KiB on 64-bit
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   
 
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 4MiB on 64-bit
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32MiB on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
 
 #define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)

From 49d64dbc9571516dc8298f6bebc34ebf9d89afc8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 10 Nov 2021 16:30:21 -0800
Subject: [PATCH 7/7] save decommit_mask for segments in the segment cache

---
 include/mimalloc-internal.h |  4 +--
 src/options.c               |  4 +--
 src/segment-cache.c         | 18 ++++++++----
 src/segment.c               | 22 +++++++++++---
 test/main-override.cpp      | 57 +++++++++++++++++++++++++++++--------
 5 files changed, 79 insertions(+), 26 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 8b642c31..7ffa0023 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -85,8 +85,8 @@ void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinn
 void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld);
 
 // "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
+void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, mi_commit_mask_t decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
 
diff --git a/src/options.c b/src/options.c
index 5f2eedec..dbd4158c 100644
--- a/src/options.c
+++ b/src/options.c
@@ -89,8 +89,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
   { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 500,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
-  { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
+  { 100,  UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
+  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
diff --git a/src/segment-cache.c b/src/segment-cache.c
index d7604502..6513204d 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -22,13 +22,14 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
 
 #define BITS_SET()          ATOMIC_VAR_INIT(UINTPTR_MAX)
-#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)
+#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)                          // note: update if MI_CACHE_FIELDS changes
 
 typedef struct mi_cache_slot_s {
   void*               p;
   size_t              memid;
   bool                is_pinned;
   mi_commit_mask_t    commit_mask;
+  mi_commit_mask_t    decommit_mask;
   _Atomic(mi_msecs_t) expire;
 } mi_cache_slot_t;
 
@@ -39,8 +40,10 @@ static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIEL
 static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
 
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
+  if (_mi_preloading()) return NULL;
+
 #ifdef MI_CACHE_DISABLE
   return NULL;
 #else
@@ -76,11 +79,11 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
   *memid = slot->memid;
   *is_pinned = slot->is_pinned;
   *is_zero = false;
-  mi_commit_mask_t cmask = slot->commit_mask;  // copy
+  *commit_mask = slot->commit_mask;     
+  *decommit_mask = slot->decommit_mask;
   slot->p = NULL;
   mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
-  *commit_mask = cmask;
-
+  
   // mark the slot as free again
   mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
   _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
@@ -140,6 +143,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
           // decommit committed parts
           // TODO: instead of decommit, we could also free to the OS?
           mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
+          slot->decommit_mask = mi_commit_mask_empty();
         }
         _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
@@ -148,7 +152,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
   }
 }
 
-mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
+mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, mi_commit_mask_t decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
   return false;
@@ -188,11 +192,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   slot->is_pinned = is_pinned;
   mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
   slot->commit_mask = commit_mask;
+  slot->decommit_mask = decommit_mask;
   if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
     long delay = mi_option_get(mi_option_segment_decommit_delay);
     if (delay == 0) {
       _mi_abandoned_await_readers(); // wait until safe to decommit
       mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
+      slot->decommit_mask = mi_commit_mask_empty();
     }
     else {
       mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
diff --git a/src/segment.c b/src/segment.c
index 6ae3d9af..1533d281 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -256,7 +256,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
   
   // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
   const size_t size = mi_segment_size(segment);
-  if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
+  if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
     const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size);
     if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
     _mi_abandoned_await_readers();  // wait until safe to free
@@ -650,12 +650,13 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   bool is_zero = false;
   const bool commit_info_still_good = (segment != NULL);
   mi_commit_mask_t commit_mask = (segment != NULL ? segment->commit_mask : mi_commit_mask_empty());
+  mi_commit_mask_t decommit_mask = (segment != NULL ? segment->decommit_mask : mi_commit_mask_empty());
   if (segment==NULL) {
     // Allocate the segment from the OS
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
     bool is_pinned = false;
     size_t memid = 0;
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
+    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
     if (segment==NULL) {
       segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
       if (segment == NULL) return NULL;  // failed to allocate
@@ -691,9 +692,22 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
   if (!commit_info_still_good) {
     segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
     segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);
-    segment->decommit_expire = 0;
-    segment->decommit_mask = mi_commit_mask_empty();
+    if (segment->allow_decommit) {
+      segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+      segment->decommit_mask = decommit_mask;
+      mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask));
+      #if MI_DEBUG>2
+      const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
+      mi_assert_internal(!mi_commit_mask_any_set(segment->decommit_mask, mi_commit_mask_create(0, commit_needed)));
+      #endif
+    }    
+    else {
+      mi_assert_internal(mi_commit_mask_is_empty(decommit_mask));
+      segment->decommit_expire = 0;
+      segment->decommit_mask = mi_commit_mask_empty();
+    }
   }
+  
 
   // initialize segment info
   segment->segment_slices = segment_slices;
diff --git a/test/main-override.cpp b/test/main-override.cpp
index 32011c67..37734d37 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -35,22 +35,24 @@ static void test_mt_shutdown();
 static void large_alloc(void);        // issue #363
 static void fail_aslr();              // issue #372
 static void tsan_numa_test();         // issue #414
-static void strdup_test();     // issue #445
+static void strdup_test();            // issue #445 
+static void bench_alloc_large(void);  // issue #xxx
 
 int main() {
   mi_stats_reset();  // ignore earlier allocations
-  
-  heap_thread_free_large();
-  heap_no_delete();
-  heap_late_free();
-  padding_shrink();
-  various_tests();
-  large_alloc();
-  tsan_numa_test();
-  strdup_test();
+
+   heap_thread_free_large();
+   heap_no_delete();
+   heap_late_free();
+   padding_shrink();
+   various_tests();
+   large_alloc();
+   tsan_numa_test();
+   strdup_test();
 
   //test_mt_shutdown();
   //fail_aslr();
+  //bench_alloc_large();
   mi_stats_print(NULL);
   return 0;
 }
@@ -246,11 +248,42 @@ static void fail_aslr() {
 // issues #414
 static void dummy_worker() {
   void* p = mi_malloc(0);
-  mi_free(p);  
+  mi_free(p);
 }
 
 static void tsan_numa_test() {
   auto t1 = std::thread(dummy_worker);
   dummy_worker();
   t1.join();
-}
\ No newline at end of file
+}
+
+// issue #?
+#include <chrono>
+#include <random>
+#include <iostream>
+
+static void bench_alloc_large(void) {
+  static constexpr int kNumBuffers = 20;
+  static constexpr size_t kMinBufferSize = 5 * 1024 * 1024;
+  static constexpr size_t kMaxBufferSize = 25 * 1024 * 1024;
+  std::unique_ptr<char[]> buffers[kNumBuffers];
+
+  std::random_device rd;
+  std::mt19937 gen(42); //rd());
+  std::uniform_int_distribution<> size_distribution(kMinBufferSize, kMaxBufferSize);
+  std::uniform_int_distribution<> buf_number_distribution(0, kNumBuffers - 1);
+
+  static constexpr int kNumIterations = 2000;
+  const auto start = std::chrono::steady_clock::now();
+  for (int i = 0; i < kNumIterations; ++i) {
+    int buffer_idx = buf_number_distribution(gen);
+    size_t new_size = size_distribution(gen);
+    buffers[buffer_idx] = std::make_unique<char[]>(new_size);
+  }
+  const auto end = std::chrono::steady_clock::now();
+  const auto num_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+  const auto us_per_allocation = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / kNumIterations;
+  std::cout << kNumIterations << " allocations Done in " << num_ms << "ms." << std::endl;
+  std::cout << "Avg " << us_per_allocation << " us per allocation" << std::endl;
+}
+