diff --git a/ide/vs2022/mimalloc-lib.vcxproj b/ide/vs2022/mimalloc-lib.vcxproj
index 61e1ff34..c3eb5ca2 100644
--- a/ide/vs2022/mimalloc-lib.vcxproj
+++ b/ide/vs2022/mimalloc-lib.vcxproj
@@ -315,7 +315,7 @@
       <CompileAs>CompileAsCpp</CompileAs>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <LanguageStandard>stdcpp20</LanguageStandard>
-      <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
       <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
     </ClCompile>
     <Link>
diff --git a/include/mimalloc-stats.h b/include/mimalloc-stats.h
index 44c4886f..631f43bb 100644
--- a/include/mimalloc-stats.h
+++ b/include/mimalloc-stats.h
@@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <mimalloc.h>
 #include <stdint.h>
 
-#define MI_STAT_VERSION   1   // increased on every backward incompatible change
+#define MI_STAT_VERSION   2   // increased on every backward incompatible change
 
 // count allocation over time
 typedef struct mi_stat_count_s {
@@ -65,6 +65,17 @@ typedef struct mi_stat_counter_s {
   MI_STAT_COUNTER(pages_unabandon_busy_wait) \
 
 
+// Size bins for chunks
+typedef enum mi_chunkbin_e {
+  MI_CBIN_SMALL,    // slice_count == 1
+  MI_CBIN_OTHER,    // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
+  MI_CBIN_MEDIUM,   // slice_count == 8
+  MI_CBIN_LARGE,    // slice_count == MI_SIZE_BITS  (only used if MI_ENABLE_LARGE_PAGES is 1)
+  MI_CBIN_NONE,     // no bin assigned yet (the chunk is completely free)
+  MI_CBIN_COUNT
+} mi_chunkbin_t;
+
+
 // Define the statistics structure
 #define MI_BIN_HUGE             (73U)   // see types.h
 #define MI_STAT_COUNT(stat)     mi_stat_count_t stat;
@@ -83,18 +94,21 @@ typedef struct mi_stats_s
   // size segregated statistics
   mi_stat_count_t   malloc_bins[MI_BIN_HUGE+1];   // allocation per size bin
   mi_stat_count_t   page_bins[MI_BIN_HUGE+1];     // pages allocated per size bin
+  mi_stat_count_t   chunk_bins[MI_CBIN_COUNT];    // chunks per page sizes
 } mi_stats_t;
 
 #undef MI_STAT_COUNT
 #undef MI_STAT_COUNTER
 
+
 // Exported definitions
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-mi_decl_export void  mi_stats_get( size_t stats_size, mi_stats_t* stats ) mi_attr_noexcept;
-mi_decl_export char* mi_stats_get_json( size_t buf_size, char* buf ) mi_attr_noexcept;    // use mi_free to free the result if the input buf == NULL
+mi_decl_export void    mi_stats_get( size_t stats_size, mi_stats_t* stats ) mi_attr_noexcept;
+mi_decl_export char*   mi_stats_get_json( size_t buf_size, char* buf ) mi_attr_noexcept;    // use mi_free to free the result if the input buf == NULL
+mi_decl_export size_t  mi_stats_get_bin_size(size_t bin) mi_attr_noexcept;
 
 #ifdef __cplusplus
 }
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 5b0445bf..3be9f619 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -264,27 +264,28 @@ typedef struct mi_heap_area_s {
 
 typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
 
-mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
+mi_decl_export bool   mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
 
 // Advanced
 mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
 mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
 
-mi_decl_export int   mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
-mi_decl_export int   mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
+mi_decl_export int    mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
+mi_decl_export int    mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
 
-mi_decl_export int   mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
-mi_decl_export bool  mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned /* cannot decommit/reset? */, bool is_zero, int numa_node) mi_attr_noexcept;
+mi_decl_export int    mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
+mi_decl_export bool   mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned /* cannot decommit/reset? */, bool is_zero, int numa_node) mi_attr_noexcept;
 
-mi_decl_export void  mi_debug_show_arenas(void) mi_attr_noexcept;
-mi_decl_export void  mi_arenas_print(void) mi_attr_noexcept;
+mi_decl_export void   mi_debug_show_arenas(void) mi_attr_noexcept;
+mi_decl_export void   mi_arenas_print(void) mi_attr_noexcept;
+mi_decl_export size_t mi_arena_min_alignment(void);
 
 // Advanced: heaps associated with specific memory arena's
 typedef void* mi_arena_id_t;
-mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
-mi_decl_export int   mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
-mi_decl_export int   mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
-mi_decl_export bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
+mi_decl_export void*  mi_arena_area(mi_arena_id_t arena_id, size_t* size);
+mi_decl_export int    mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
+mi_decl_export int    mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
+mi_decl_export bool   mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 
 #if MI_MALLOC_VERSION >= 182
 // Create a heap that only allocates in the specified arena
@@ -329,11 +330,16 @@ mi_decl_export void mi_collect_reduce(size_t target_thread_owned) mi_attr_noexce
 
 
 // experimental
+typedef bool (mi_cdecl mi_commit_fun_t)(bool commit, void* start, size_t size, bool* is_zero, void* user_arg);
+mi_decl_export bool  mi_manage_memory(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive,
+                                      mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id) mi_attr_noexcept;
+
 mi_decl_export bool  mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
-mi_decl_export bool  mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
+mi_decl_export bool  mi_arena_reload(void* start, size_t size, mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id);
 mi_decl_export bool  mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena);
 mi_decl_export void  mi_heap_unload(mi_heap_t* heap);
 
+
 // Is a pointer contained in the given arena area?
 mi_decl_export bool  mi_arena_contains(mi_arena_id_t arena_id, const void* p);
 
diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h
index 55ea3781..f91d5f4a 100644
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@@ -271,6 +271,14 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int6
   return current;
 #endif
 }
+
+static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
+  const int64_t add = *padd;
+  if (add != 0) {
+    mi_atomic_addi64_relaxed((volatile _Atomic(int64_t)*)p, add);
+  }
+}
+
 static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
   int64_t current;
   do {
diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h
index 2debaf25..1814853a 100644
--- a/include/mimalloc/bits.h
+++ b/include/mimalloc/bits.h
@@ -285,7 +285,6 @@ static inline size_t mi_clz(size_t x) {
 // return false if `x==0` (with `*idx` undefined) and true otherwise,
 // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
 static inline bool mi_bsf(size_t x, size_t* idx) {
-  // we don't optimize anymore to lzcnt so we run correctly on older cpu's as well
   #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
     // on x64 the carry flag is set on zero which gives better codegen
     bool is_zero;
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 8a880b8d..a51cd22f 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -150,14 +150,14 @@ bool          _mi_os_decommit(void* addr, size_t size);
 bool          _mi_os_protect(void* addr, size_t size);
 bool          _mi_os_unprotect(void* addr, size_t size);
 bool          _mi_os_purge(void* p, size_t size);
-bool          _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stats_size);
+bool          _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stats_size, mi_commit_fun_t* commit_fun, void* commit_fun_arg);
 bool          _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
 
 size_t        _mi_os_secure_guard_page_size(void);
-bool          _mi_os_secure_guard_page_set_at(void* addr, bool is_pinned);
-bool          _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned);
-bool          _mi_os_secure_guard_page_reset_at(void* addr);
-bool          _mi_os_secure_guard_page_reset_before(void* addr);
+bool          _mi_os_secure_guard_page_set_at(void* addr, mi_memid_t memid);
+bool          _mi_os_secure_guard_page_set_before(void* addr, mi_memid_t memid);
+bool          _mi_os_secure_guard_page_reset_at(void* addr, mi_memid_t memid);
+bool          _mi_os_secure_guard_page_reset_before(void* addr, mi_memid_t memid);
 
 int           _mi_os_numa_node(void);
 int           _mi_os_numa_node_count(void);
@@ -258,7 +258,7 @@ bool          _mi_page_is_valid(mi_page_t* page);
 
 
 /* -----------------------------------------------------------
-  Assertions
+   Assertions
 ----------------------------------------------------------- */
 
 #if (MI_DEBUG)
@@ -281,6 +281,46 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line
 #define mi_assert_expensive(x)
 #endif
 
+/* -----------------------------------------------------------
+  Statistics (in `stats.c`)
+----------------------------------------------------------- */
+
+// add to stat keeping track of the peak
+void __mi_stat_increase(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount);
+
+// adjust stat in special cases to compensate for double counting (and does not adjust peak values and can decrease the total)
+void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount);
+
+// counters can just be increased
+void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
+void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount);
+
+#define mi_subproc_stat_counter_increase(subproc,stat,amount)   __mi_stat_counter_increase_mt( &(subproc)->stats.stat, amount)
+#define mi_subproc_stat_increase(subproc,stat,amount)           __mi_stat_increase_mt( &(subproc)->stats.stat, amount)
+#define mi_subproc_stat_decrease(subproc,stat,amount)           __mi_stat_decrease_mt( &(subproc)->stats.stat, amount)
+#define mi_subproc_stat_adjust_increase(subproc,stat,amnt)      __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amnt)
+#define mi_subproc_stat_adjust_decrease(subproc,stat,amnt)      __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amnt)
+
+#define mi_tld_stat_counter_increase(tld,stat,amount)           __mi_stat_counter_increase( &(tld)->stats.stat, amount)
+#define mi_tld_stat_increase(tld,stat,amount)                   __mi_stat_increase( &(tld)->stats.stat, amount)
+#define mi_tld_stat_decrease(tld,stat,amount)                   __mi_stat_decrease( &(tld)->stats.stat, amount)
+#define mi_tld_stat_adjust_increase(tld,stat,amnt)              __mi_stat_adjust_increase( &(tld)->stats.stat, amnt)
+#define mi_tld_stat_adjust_decrease(tld,stat,amnt)              __mi_stat_adjust_decrease( &(tld)->stats.stat, amnt)
+
+#define mi_os_stat_counter_increase(stat,amount)                mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount)
+#define mi_os_stat_increase(stat,amount)                        mi_subproc_stat_increase(_mi_subproc(),stat,amount)
+#define mi_os_stat_decrease(stat,amount)                        mi_subproc_stat_decrease(_mi_subproc(),stat,amount)
+
+#define mi_heap_stat_counter_increase(heap,stat,amount)         mi_tld_stat_counter_increase(heap->tld, stat, amount)
+#define mi_heap_stat_increase(heap,stat,amount)                 mi_tld_stat_increase( heap->tld, stat, amount)
+#define mi_heap_stat_decrease(heap,stat,amount)                 mi_tld_stat_decrease( heap->tld, stat, amount)
+
 
 /* -----------------------------------------------------------
   Inlined definitions
@@ -299,7 +339,9 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line
 #define MI_INIT64(x)  MI_INIT32(x),MI_INIT32(x)
 #define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x)
 #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
+
 #define MI_INIT74(x)  MI_INIT64(x),MI_INIT8(x),x(),x()
+#define MI_INIT5(x)   MI_INIT4(x),x()
 
 #include <string.h>
 // initialize a local variable to zero; use memset as compilers optimize constant sized memset's
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 2f76cfe6..0a60517c 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -22,6 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <mimalloc-stats.h>
 #include <stddef.h>   // ptrdiff_t
 #include <stdint.h>   // uintptr_t, uint16_t, etc
+#include <limits.h>   // SIZE_MAX etc.
 #include <errno.h>    // error codes
 #include "bits.h"     // size defines (MI_INTPTR_SIZE etc), bit operations
 #include "atomic.h"   // _Atomic primitives
@@ -75,6 +76,15 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 #endif
 
+// Statistics (0=only essential, 1=normal, 2=more fine-grained (expensive) tracking)
+#ifndef MI_STAT
+#if (MI_DEBUG>0)
+#define MI_STAT 2
+#else
+#define MI_STAT 0
+#endif
+#endif
+
 // Use guard pages behind objects of a certain size (set by the MIMALLOC_DEBUG_GUARDED_MIN/MAX options)
 // Padding should be disabled when using guard pages
 // #define MI_GUARDED 1
@@ -111,16 +121,26 @@ terms of the MIT license. A copy of the license can be found in the file
 // (comments specify sizes on 64-bit, usually 32-bit is halved)
 // --------------------------------------------------------------
 
-// Sizes are for 64-bit
+// Main size parameter; determines max arena sizes and max arena object sizes etc.
 #ifndef MI_ARENA_SLICE_SHIFT
-#ifdef  MI_SMALL_PAGE_SHIFT   // backward compatibility
-#define MI_ARENA_SLICE_SHIFT              MI_SMALL_PAGE_SHIFT
-#else
-#define MI_ARENA_SLICE_SHIFT              (13 + MI_SIZE_SHIFT)        // 64 KiB (32 KiB on 32-bit)
+  #ifdef  MI_SMALL_PAGE_SHIFT   // backward compatibility
+  #define MI_ARENA_SLICE_SHIFT              MI_SMALL_PAGE_SHIFT
+  #else
+  #define MI_ARENA_SLICE_SHIFT              (13 + MI_SIZE_SHIFT)        // 64 KiB (32 KiB on 32-bit)
+  #endif
 #endif
+#if MI_ARENA_SLICE_SHIFT < 12
+#error Arena slices should be at least 4KiB
 #endif
+
 #ifndef MI_BCHUNK_BITS_SHIFT
-#define MI_BCHUNK_BITS_SHIFT              (6 + MI_SIZE_SHIFT)         // optimized for 512 bits per chunk (avx512)
+  #if MI_ARENA_SLICE_SHIFT <= 13    // <= 8KiB
+  #define MI_BCHUNK_BITS_SHIFT              (7)   // 128 bits
+  #elif MI_ARENA_SLICE_SHIFT < 16   // <= 32KiB
+  #define MI_BCHUNK_BITS_SHIFT              (8)   // 256 bits
+  #else 
+  #define MI_BCHUNK_BITS_SHIFT              (6 + MI_SIZE_SHIFT)       // 512 bits (or 256 on 32-bit)
+  #endif
 #endif
 
 #define MI_BCHUNK_BITS                    (1 << MI_BCHUNK_BITS_SHIFT)         // sub-bitmaps are "bchunks" of 512 bits
@@ -133,6 +153,10 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_ARENA_MIN_OBJ_SIZE             (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
 #define MI_ARENA_MAX_OBJ_SIZE             (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
 
+#if MI_ARENA_MAX_OBJ_SIZE < MI_SIZE_SIZE*1024
+#error maximum object size may be too small to hold local thread data  
+#endif
+
 #define MI_SMALL_PAGE_SIZE                MI_ARENA_MIN_OBJ_SIZE                    // 64 KiB
 #define MI_MEDIUM_PAGE_SIZE               (8*MI_SMALL_PAGE_SIZE)                   // 512 KiB  (=byte in the bchunk bitmap)
 #define MI_LARGE_PAGE_SIZE                (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE)       // 4 MiB    (=word in the bchunk bitmap)
@@ -151,6 +175,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // Minimal commit for a page on-demand commit (should be >= OS page size)
 #define MI_PAGE_MIN_COMMIT_SIZE  MI_ARENA_SLICE_SIZE // (4*MI_KiB)
 
+
 // ------------------------------------------------------
 // Arena's are large reserved areas of memory allocated from
 // the OS that are managed by mimalloc to efficiently
@@ -158,8 +183,8 @@ terms of the MIT license. A copy of the license can be found in the file
 // mimalloc pages.
 // ------------------------------------------------------
 
-// A large memory arena where pages are allocated in.
-typedef struct mi_arena_s mi_arena_t;     // defined in `arena.c`
+// A large memory arena where pages are allocated in. 
+typedef struct mi_arena_s mi_arena_t;     // defined below
 
 
 // ---------------------------------------------------------------
@@ -227,6 +252,11 @@ static inline bool mi_memid_needs_no_free(mi_memid_t memid) {
   return mi_memkind_needs_no_free(memid.memkind);
 }
 
+static inline mi_arena_t* mi_memid_arena(mi_memid_t memid) {
+  return (memid.memkind == MI_MEM_ARENA ? memid.mem.arena.arena : NULL);
+}
+
+
 // ------------------------------------------------------
 // Mimalloc pages contain allocated blocks
 // ------------------------------------------------------
@@ -385,7 +415,7 @@ typedef enum mi_page_kind_e {
 // ------------------------------------------------------
 
 // Thread local data
-typedef struct mi_tld_s mi_tld_t;
+typedef struct mi_tld_s mi_tld_t;   // defined below
 
 // Pages of a certain block size are held in a queue.
 typedef struct mi_page_queue_s {
@@ -451,9 +481,11 @@ struct mi_heap_s {
 
 
 // ------------------------------------------------------
-// Sub processes do not reclaim or visit segments
-// from other sub processes. These are essentially the
-// static variables of a process.
+// Sub processes do not reclaim or visit pages from other sub processes. 
+// These are essentially the static variables of a process, and
+// usually there is only one subprocess. This can be used for example
+// by CPython to have seperate interpreters within one process.
+// Each thread can only belong to one subprocess.
 // ------------------------------------------------------
 
 #define MI_MAX_ARENAS   (160)   // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`)
@@ -498,6 +530,50 @@ struct mi_tld_s {
 };
 
 
+/* ----------------------------------------------------------------------------
+  Arenas are fixed area's of OS memory from which we can allocate
+  large blocks (>= MI_ARENA_MIN_BLOCK_SIZE).
+  In contrast to the rest of mimalloc, the arenas are shared between
+  threads and need to be accessed using atomic operations (using atomic `mi_bitmap_t`'s).
+
+  Arenas are also used to for huge OS page (1GiB) reservations or for reserving
+  OS memory upfront which can be improve performance or is sometimes needed
+  on embedded devices. We can also employ this with WASI or `sbrk` systems
+  to reserve large arenas upfront and be able to reuse the memory more effectively.
+-----------------------------------------------------------------------------*/
+
+#define MI_ARENA_BIN_COUNT      (MI_BIN_COUNT)
+#define MI_ARENA_MIN_SIZE       (MI_BCHUNK_BITS * MI_ARENA_SLICE_SIZE)           // 32 MiB (or 8 MiB on 32-bit)
+#define MI_ARENA_MAX_SIZE       (MI_BITMAP_MAX_BIT_COUNT * MI_ARENA_SLICE_SIZE)
+
+typedef struct mi_bitmap_s  mi_bitmap_t;    // atomic bitmap  (defined in `src/bitmap.h`)
+typedef struct mi_bbitmap_s mi_bbitmap_t;   // atomic binned bitmap (defined in `src/bitmap.h`)
+
+// A memory arena
+typedef struct mi_arena_s {
+  mi_memid_t          memid;                // provenance of the memory area
+  mi_subproc_t*       subproc;              // subprocess this arena belongs to (`this 'element-of' this->subproc->arenas`)
+
+  size_t              slice_count;          // total size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`)
+  size_t              info_slices;          // initial slices reserved for the arena bitmaps
+  int                 numa_node;            // associated NUMA node
+  bool                is_exclusive;         // only allow allocations if specifically for this arena
+  _Atomic(mi_msecs_t) purge_expire;         // expiration time when slices can be purged from `slices_purge`.
+  mi_commit_fun_t*    commit_fun;           // custom commit/decommit memory
+  void*               commit_fun_arg;       // user argument for a custom commit function
+
+  mi_bbitmap_t*       slices_free;          // is the slice free? (a binned bitmap with size classes)
+  mi_bitmap_t*        slices_committed;     // is the slice committed? (i.e. accessible)
+  mi_bitmap_t*        slices_dirty;         // is the slice potentially non-zero?
+  mi_bitmap_t*        slices_purge;         // slices that can be purged
+  mi_bitmap_t*        pages;                // all registered pages (abandoned and owned)
+  mi_bitmap_t*        pages_abandoned[MI_ARENA_BIN_COUNT];  // abandoned pages per size bin (a set bit means the start of the page)
+                                            // the full queue contains abandoned full pages
+  // followed by the bitmaps (whose sizes depend on the arena size)
+  // note: when adding bitmaps revise `mi_arena_info_slices_needed`
+} mi_arena_t;
+
+
 /* -----------------------------------------------------------
   Error codes passed to `_mi_fatal_error`
   All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
@@ -521,10 +597,9 @@ struct mi_tld_s {
 #define EOVERFLOW (75)
 #endif
 
-
-// ------------------------------------------------------
-// Debug
-// ------------------------------------------------------
+/* -----------------------------------------------------------
+  Debug constants
+----------------------------------------------------------- */
 
 #if !defined(MI_DEBUG_UNINIT)
 #define MI_DEBUG_UNINIT     (0xD0)
@@ -536,93 +611,5 @@ struct mi_tld_s {
 #define MI_DEBUG_PADDING    (0xDE)
 #endif
 
-#if (MI_DEBUG)
-// use our own assertion to print without memory allocation
-void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func );
-#define mi_assert(expr)     ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__))
-#else
-#define mi_assert(x)
-#endif
-
-#if (MI_DEBUG>1)
-#define mi_assert_internal    mi_assert
-#else
-#define mi_assert_internal(x)
-#endif
-
-#if (MI_DEBUG>2)
-#define mi_assert_expensive   mi_assert
-#else
-#define mi_assert_expensive(x)
-#endif
-
-
-// ------------------------------------------------------
-// Statistics
-// ------------------------------------------------------
-#ifndef MI_STAT
-#if (MI_DEBUG>0)
-#define MI_STAT 2
-#else
-#define MI_STAT 0
-#endif
-#endif
-
-
-// add to stat keeping track of the peak
-void __mi_stat_increase(mi_stat_count_t* stat, size_t amount);
-void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
-void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount);
-void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount);
-
-// adjust stat in special cases to compensate for double counting (and does not adjust peak values and can decrease the total)
-void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount);
-void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
-void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount);
-void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount);
-
-// counters can just be increased
-void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
-void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount);
-
-#if (MI_STAT)
-#define mi_debug_stat_increase(stat,amount)                     __mi_stat_increase( &(stat), amount)
-#define mi_debug_stat_decrease(stat,amount)                     __mi_stat_decrease( &(stat), amount)
-#define mi_debug_stat_counter_increase(stat,amount)             __mi_stat_counter_increase( &(stat), amount)
-#define mi_debug_stat_increase_mt(stat,amount)                  __mi_stat_increase_mt( &(stat), amount)
-#define mi_debug_stat_decrease_mt(stat,amount)                  __mi_stat_decrease_mt( &(stat), amount)
-#define mi_debug_stat_counter_increase_mt(stat,amount)          __mi_stat_counter_increase_mt( &(stat), amount)
-#else
-#define mi_debug_stat_increase(stat,amount)                     ((void)0)
-#define mi_debug_stat_decrease(stat,amount)                     ((void)0)
-#define mi_debug_stat_counter_increase(stat,amount)             ((void)0)
-#define mi_debug_stat_increase_mt(stat,amount)                  ((void)0)
-#define mi_debug_stat_decrease_mt(stat,amount)                  ((void)0)
-#define mi_debug_stat_counter_increase_mt(stat,amount)          ((void)0)
-#endif
-
-#define mi_subproc_stat_counter_increase(subproc,stat,amount)   __mi_stat_counter_increase_mt( &(subproc)->stats.stat, amount)
-#define mi_subproc_stat_increase(subproc,stat,amount)           __mi_stat_increase_mt( &(subproc)->stats.stat, amount)
-#define mi_subproc_stat_decrease(subproc,stat,amount)           __mi_stat_decrease_mt( &(subproc)->stats.stat, amount)
-#define mi_subproc_stat_adjust_increase(subproc,stat,amnt)      __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amnt)
-#define mi_subproc_stat_adjust_decrease(subproc,stat,amnt)      __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amnt)
-
-#define mi_tld_stat_counter_increase(tld,stat,amount)           __mi_stat_counter_increase( &(tld)->stats.stat, amount)
-#define mi_tld_stat_increase(tld,stat,amount)                   __mi_stat_increase( &(tld)->stats.stat, amount)
-#define mi_tld_stat_decrease(tld,stat,amount)                   __mi_stat_decrease( &(tld)->stats.stat, amount)
-#define mi_tld_stat_adjust_increase(tld,stat,amnt)              __mi_stat_adjust_increase( &(tld)->stats.stat, amnt)
-#define mi_tld_stat_adjust_decrease(tld,stat,amnt)              __mi_stat_adjust_decrease( &(tld)->stats.stat, amnt)
-
-#define mi_os_stat_counter_increase(stat,amount)                mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount)
-#define mi_os_stat_increase(stat,amount)                        mi_subproc_stat_increase(_mi_subproc(),stat,amount)
-#define mi_os_stat_decrease(stat,amount)                        mi_subproc_stat_decrease(_mi_subproc(),stat,amount)
-
-#define mi_heap_stat_counter_increase(heap,stat,amount)         mi_tld_stat_counter_increase(heap->tld, stat, amount)
-#define mi_heap_stat_increase(heap,stat,amount)                 mi_tld_stat_increase( heap->tld, stat, amount)
-#define mi_heap_stat_decrease(heap,stat,amount)                 mi_tld_stat_decrease( heap->tld, stat, amount)
-
-#define mi_debug_heap_stat_counter_increase(heap,stat,amount)   mi_debug_stat_counter_increase( (heap)->tld->stats.stat, amount)
-#define mi_debug_heap_stat_increase(heap,stat,amount)           mi_debug_stat_increase( (heap)->tld->stats.stat, amount)
-#define mi_debug_heap_stat_decrease(heap,stat,amount)           mi_debug_stat_decrease( (heap)->tld->stats.stat, amount)
 
 #endif // MI_TYPES_H
diff --git a/src/arena-meta.c b/src/arena-meta.c
index a6cc965d..3b64ab9b 100644
--- a/src/arena-meta.c
+++ b/src/arena-meta.c
@@ -25,11 +25,16 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_META_PAGE_SIZE         MI_ARENA_SLICE_SIZE
 #define MI_META_PAGE_ALIGN        MI_ARENA_SLICE_ALIGN
 
-#define MI_META_BLOCK_SIZE        (128)                       // large enough such that META_MAX_SIZE >= 4k (even on 32-bit)
+// large enough such that META_MAX_SIZE > 4k (even on 32-bit)
+#define MI_META_BLOCK_SIZE        (1 << (16 - MI_BCHUNK_BITS_SHIFT))        // 128 on 64-bit
 #define MI_META_BLOCK_ALIGN       MI_META_BLOCK_SIZE
 #define MI_META_BLOCKS_PER_PAGE   (MI_META_PAGE_SIZE / MI_META_BLOCK_SIZE)  // 512
 #define MI_META_MAX_SIZE          (MI_BCHUNK_SIZE * MI_META_BLOCK_SIZE)
 
+#if MI_META_MAX_SIZE <= 4096
+#error "max meta object size should be at least 4KiB"
+#endif
+
 typedef struct mi_meta_page_s  {
   _Atomic(struct mi_meta_page_s*)  next;    // a linked list of meta-data pages (never released)
   mi_memid_t                       memid;   // provenance of the meta-page memory itself
@@ -77,8 +82,8 @@ static mi_meta_page_t* mi_meta_page_zalloc(void) {
 
   // guard pages
   #if MI_SECURE >= 1
-  _mi_os_secure_guard_page_set_at(base, memid.is_pinned);
-  _mi_os_secure_guard_page_set_before(base + MI_META_PAGE_SIZE, memid.is_pinned);
+  _mi_os_secure_guard_page_set_at(base, memid);
+  _mi_os_secure_guard_page_set_before(base + MI_META_PAGE_SIZE, memid);
   #endif
   
   // initialize the page and free block bitmap
diff --git a/src/arena.c b/src/arena.c
index daf36411..cd7c8d2b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -24,37 +24,6 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
 #include "bitmap.h"
 
 
-/* -----------------------------------------------------------
-  Arena allocation
------------------------------------------------------------ */
-
-#define MI_ARENA_BIN_COUNT      (MI_BIN_COUNT)
-#define MI_ARENA_MIN_SIZE       (MI_BCHUNK_BITS * MI_ARENA_SLICE_SIZE)           // 32 MiB (or 8 MiB on 32-bit)
-#define MI_ARENA_MAX_SIZE       (MI_BITMAP_MAX_BIT_COUNT * MI_ARENA_SLICE_SIZE)
-
-// A memory arena descriptor
-typedef struct mi_arena_s {
-  mi_memid_t          memid;                // memid of the memory area
-  mi_subproc_t*       subproc;              // subprocess this arena belongs to (`this 'in' this->subproc->arenas`)
-
-  size_t              slice_count;          // total size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`)
-  size_t              info_slices;          // initial slices reserved for the arena bitmaps
-  int                 numa_node;            // associated NUMA node
-  bool                is_exclusive;         // only allow allocations if specifically for this arena
-  _Atomic(mi_msecs_t) purge_expire;         // expiration time when slices can be purged from `slices_purge`.
-
-  mi_bbitmap_t*       slices_free;          // is the slice free? (a binned bitmap with size classes)
-  mi_bitmap_t*        slices_committed;     // is the slice committed? (i.e. accessible)
-  mi_bitmap_t*        slices_dirty;         // is the slice potentially non-zero?
-  mi_bitmap_t*        slices_purge;         // slices that can be purged
-  mi_bitmap_t*        pages;                // all registered pages (abandoned and owned)
-  mi_bitmap_t*        pages_abandoned[MI_BIN_COUNT];  // abandoned pages per size bin (a set bit means the start of the page)
-                                            // the full queue contains abandoned full pages
-  // followed by the bitmaps (whose sizes depend on the arena size)
-  // note: when adding bitmaps revise `mi_arena_info_slices_needed`
-} mi_arena_t;
-
-
 /* -----------------------------------------------------------
   Arena id's
 ----------------------------------------------------------- */
@@ -103,6 +72,24 @@ static bool mi_arena_has_page(mi_arena_t* arena, mi_page_t* page) {
 }
 #endif
 
+size_t mi_arena_min_alignment(void) {
+  return MI_ARENA_SLICE_ALIGN;
+}
+
+static bool mi_arena_commit(mi_arena_t* arena, void* start, size_t size, bool* is_zero, size_t already_committed) {
+  if (arena != NULL && arena->commit_fun != NULL) {
+    return (*arena->commit_fun)(true, start, size, is_zero, arena->commit_fun_arg);
+  }
+  else if (already_committed > 0) {
+    return _mi_os_commit_ex(start, size, is_zero, already_committed);
+  }
+  else {
+    return _mi_os_commit(start, size, is_zero);
+  }
+}
+
+
+
 /* -----------------------------------------------------------
   Util
 ----------------------------------------------------------- */
@@ -175,6 +162,7 @@ static size_t mi_page_full_size(mi_page_t* page) {
   }
 }
 
+
 /* -----------------------------------------------------------
   Arena Allocation
 ----------------------------------------------------------- */
@@ -211,7 +199,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
       mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count);
       // now actually commit
       bool commit_zero = false;
-      if (!_mi_os_commit_ex(p, mi_size_of_slices(slice_count), &commit_zero, mi_size_of_slices(slice_count - already_committed_count))) {
+      if (!mi_arena_commit(arena, p, mi_size_of_slices(slice_count), &commit_zero, mi_size_of_slices(slice_count - already_committed_count))) {
         memid->initially_committed = false;
       }
       else {
@@ -623,7 +611,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice
       page = (mi_page_t*)mi_arena_os_alloc_aligned(alloc_size, page_alignment, 0 /* align offset */, commit, allow_large, req_arena, &memid);
     }
   }
-
+  
   if (page == NULL) return NULL;
   mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
   mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment));
@@ -635,7 +623,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice
   mi_assert(alloc_size > _mi_os_secure_guard_page_size());
   const size_t page_noguard_size = alloc_size - _mi_os_secure_guard_page_size();
   if (memid.initially_committed) {
-    _mi_os_secure_guard_page_set_at((uint8_t*)page + page_noguard_size, memid.is_pinned);
+    _mi_os_secure_guard_page_set_at((uint8_t*)page + page_noguard_size, memid);
   }
   #endif
 
@@ -689,7 +677,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice
     commit_size = _mi_align_up(block_start + block_size, MI_PAGE_MIN_COMMIT_SIZE);
     if (commit_size > page_noguard_size) { commit_size = page_noguard_size; }
     bool is_zero;
-    _mi_os_commit(page, commit_size, &is_zero);
+    mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0);
     if (!memid.initially_zero && !is_zero) {
       _mi_memzero_aligned(page, commit_size);
     }
@@ -810,8 +798,7 @@ void _mi_arenas_page_free(mi_page_t* page) {
     size_t bin = _mi_bin(mi_page_block_size(page));
     size_t slice_index;
     size_t slice_count;
-    mi_arena_t* arena = mi_page_arena(page, &slice_index, &slice_count);
-
+    mi_arena_t* const arena = mi_page_arena(page, &slice_index, &slice_count);
     mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
     mi_assert_internal(page->slice_committed > 0 || mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
     mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1));
@@ -826,14 +813,14 @@ void _mi_arenas_page_free(mi_page_t* page) {
   // we must do this since we may later allocate large spans over this page and cannot have a guard page in between
   #if MI_SECURE >= 2
   if (!page->memid.is_pinned) {
-    _mi_os_secure_guard_page_reset_before((uint8_t*)page + mi_page_full_size(page));
+    _mi_os_secure_guard_page_reset_before((uint8_t*)page + mi_page_full_size(page), page->memid);
   }
   #endif
 
   // unregister page
   _mi_page_map_unregister(page);
   if (page->memid.memkind == MI_MEM_ARENA) {
-    mi_arena_t* arena = page->memid.mem.arena.arena;
+    mi_arena_t* const arena = page->memid.mem.arena.arena;
     mi_bitmap_clear(arena->pages, page->memid.mem.arena.slice_index);
     if (page->slice_committed > 0) {
       // if committed on-demand, set the commit bits to account commit properly
@@ -1160,7 +1147,8 @@ static mi_bbitmap_t* mi_arena_bbitmap_init(size_t slice_count, uint8_t** base) {
 }
 
 
-static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
+static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, int numa_node, bool exclusive, 
+                                    mi_memid_t memid, mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id) mi_attr_noexcept
 {
   mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE));
   mi_assert(start!=NULL);
@@ -1190,17 +1178,29 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s
     _mi_warning_message("cannot use OS memory since it is not large enough (size %zu KiB, minimum required is %zu KiB)", size/MI_KiB, mi_size_of_slices(info_slices+1)/MI_KiB);
     return false;
   }
+  else if (info_slices >= MI_ARENA_MAX_OBJ_SLICES) {
+    _mi_warning_message("cannot use OS memory since it is too large with respect to the maximum object size (size %zu MiB, meta-info slices %zu, maximum object slices are %zu)", size/MI_MiB, info_slices, MI_ARENA_MAX_OBJ_SLICES);
+    return false;
+  }
 
   mi_arena_t* arena = (mi_arena_t*)start;
 
   // commit & zero if needed
   if (!memid.initially_committed) {
-    // leave a guard OS page decommitted at the end
-    _mi_os_commit(arena, mi_size_of_slices(info_slices) - _mi_os_secure_guard_page_size(), NULL);
+    size_t commit_size = mi_size_of_slices(info_slices);
+    // leave a guard OS page decommitted at the end?
+    if (!memid.is_pinned) { commit_size -= _mi_os_secure_guard_page_size(); }
+    if (commit_fun != NULL) {
+      (*commit_fun)(true /* commit */, arena, commit_size, NULL, commit_fun_arg);
+    }
+    else {
+      _mi_os_commit(arena, commit_size, NULL);
+    }
   }
-  else {
+  else if (!memid.is_pinned) {
     // if MI_SECURE, set a guard page at the end
-    _mi_os_secure_guard_page_set_before((uint8_t*)arena + mi_size_of_slices(info_slices), memid.is_pinned);
+    // todo: this does not respect the commit_fun as the memid is of external memory
+    _mi_os_secure_guard_page_set_before((uint8_t*)arena + mi_size_of_slices(info_slices), memid);
   }
   if (!memid.initially_zero) {
     _mi_memzero(arena, mi_size_of_slices(info_slices) - _mi_os_secure_guard_page_size());
@@ -1214,6 +1214,8 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s
   arena->info_slices  = info_slices;
   arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
   arena->purge_expire = 0;
+  arena->commit_fun   = commit_fun;
+  arena->commit_fun_arg = commit_fun_arg;
   // mi_lock_init(&arena->abandoned_visit_lock);
 
   // init bitmaps
@@ -1254,9 +1256,21 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is
   memid.initially_committed = is_committed;
   memid.initially_zero = is_zero;
   memid.is_pinned = is_pinned;
-  return mi_manage_os_memory_ex2(_mi_subproc(), start, size, numa_node, exclusive, memid, arena_id);
+  return mi_manage_os_memory_ex2(_mi_subproc(), start, size, numa_node, exclusive, memid, NULL, NULL, arena_id);
 }
 
+bool mi_manage_memory(void* start, size_t size, bool is_committed, bool is_zero, bool is_pinned, int numa_node, bool exclusive, mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id) mi_attr_noexcept
+{
+  mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
+  memid.mem.os.base = start;
+  memid.mem.os.size = size;
+  memid.initially_committed = is_committed;
+  memid.initially_zero = is_zero;
+  memid.is_pinned = is_pinned;
+  return mi_manage_os_memory_ex2(_mi_subproc(), start, size, numa_node, exclusive, memid, commit_fun, commit_fun_arg, arena_id);
+}
+
+
 // Reserve a range of regular OS memory
 static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) {
   if (arena_id != NULL) *arena_id = _mi_arena_id_none();
@@ -1264,7 +1278,7 @@ static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool com
   mi_memid_t memid;
   void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid);
   if (start == NULL) return ENOMEM;
-  if (!mi_manage_os_memory_ex2(subproc, start, size, -1 /* numa node */, exclusive, memid, arena_id)) {
+  if (!mi_manage_os_memory_ex2(subproc, start, size, -1 /* numa node */, exclusive, memid, NULL, NULL, arena_id)) {
     _mi_os_free_ex(start, size, commit, memid);
     _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024));
     return ENOMEM;
@@ -1294,6 +1308,20 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe
 /* -----------------------------------------------------------
   Debugging
 ----------------------------------------------------------- */
+
+// Return idx of the slice past the last used slice 
+static size_t mi_arena_used_slices(mi_arena_t* arena) {
+  size_t idx;
+  if (mi_bitmap_bsr(arena->pages, &idx)) {
+    mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, idx);
+    const size_t page_slice_count = page->memid.mem.arena.slice_count;
+    return (idx + page_slice_count);
+  }
+  else {
+    return mi_arena_info_slices(arena);
+  }
+}
+
 static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf, size_t* k) {
   size_t bit_set_count = 0;
   for (int bit = 0; bit < MI_BFIELD_BITS; bit++) {
@@ -1384,13 +1412,24 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k,
   return bit_set_count;
 }
 
-static size_t mi_debug_show_chunks(const char* header1, const char* header2, const char* header3, size_t slice_count, size_t chunk_count, mi_bchunk_t* chunks, mi_bchunkmap_t* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) {
+static size_t mi_debug_show_chunks(const char* header1, const char* header2, const char* header3, 
+                                   size_t slice_count, size_t chunk_count, 
+                                   mi_bchunk_t* chunks, mi_bchunkmap_t* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) 
+{
   _mi_raw_message("\x1B[37m%s%s%s (use/commit: \x1B[31m0 - 25%%\x1B[33m - 50%%\x1B[36m - 75%%\x1B[32m - 100%%\x1B[0m)\n", header1, header2, header3);
   const size_t fields_per_line = (narrow ? 2 : 4);
+  const size_t used_slice_count = mi_arena_used_slices(arena);
   size_t bit_count = 0;
   size_t bit_set_count = 0;
-  for (size_t i = 0; i < chunk_count && bit_count < slice_count; i++) {
+  for (size_t i = 0; i < chunk_count && bit_count < slice_count; i++) {    
     char buf[5*MI_BCHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf));
+    if (bit_count > used_slice_count) {
+      const size_t diff = chunk_count - 1 - i;
+      bit_count += diff*MI_BCHUNK_BITS;
+      _mi_raw_message("  |\n");
+      i = chunk_count-1;
+    }
+
     size_t k = 0;
     mi_bchunk_t* chunk = &chunks[i];
 
@@ -1401,12 +1440,12 @@ static size_t mi_debug_show_chunks(const char* header1, const char* header2, con
     char chunk_kind = ' ';
     if (chunk_bins != NULL) {
       switch (mi_bbitmap_debug_get_bin(chunk_bins,i)) {
-        case MI_BBIN_SMALL:  chunk_kind = 'S'; break;
-        case MI_BBIN_MEDIUM: chunk_kind = 'M'; break;
-        case MI_BBIN_LARGE:  chunk_kind = 'L'; break;
-        case MI_BBIN_OTHER:  chunk_kind = 'X'; break;
+        case MI_CBIN_SMALL:  chunk_kind = 'S'; break;
+        case MI_CBIN_MEDIUM: chunk_kind = 'M'; break;
+        case MI_CBIN_LARGE:  chunk_kind = 'L'; break;
+        case MI_CBIN_OTHER:  chunk_kind = 'X'; break;
         default: chunk_kind = ' '; break; // suppress warning
-        // case MI_BBIN_NONE: chunk_kind = 'N'; break;
+        // case MI_CBIN_NONE: chunk_kind = 'N'; break;
       }
     }
     buf[k++] = chunk_kind;
@@ -1509,7 +1548,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m
   }
   _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
 
-  if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, numa_node, exclusive, memid, arena_id)) {
+  if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, numa_node, exclusive, memid, NULL, NULL, arena_id)) {
     _mi_os_free(p, hsize, memid);
     return ENOMEM;
   }
@@ -1583,7 +1622,7 @@ static bool mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_c
   size_t already_committed;
   mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed); // pretend all committed.. (as we lack a clearN call that counts the already set bits..)
   const bool all_committed = (already_committed == slice_count);
-  const bool needs_recommit = _mi_os_purge_ex(p, size, all_committed /* allow reset? */, mi_size_of_slices(already_committed));
+  const bool needs_recommit = _mi_os_purge_ex(p, size, all_committed /* allow reset? */, mi_size_of_slices(already_committed), arena->commit_fun, arena->commit_fun_arg);
 
   if (needs_recommit) {
     // no longer committed
@@ -1684,7 +1723,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
   if (!force && (expire == 0 || expire > now)) return false;
 
   // reset expire
-  mi_atomic_store_release(&arena->purge_expire, (mi_msecs_t)0);
+  mi_atomic_storei64_release(&arena->purge_expire, (mi_msecs_t)0);
   mi_subproc_stat_counter_increase(arena->subproc, arena_purges, 1);
 
   // go through all purge info's  (with max MI_BFIELD_BITS ranges at a time)
@@ -1706,7 +1745,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld)
   // check if any arena needs purging?
   mi_subproc_t* subproc = tld->subproc;
   const mi_msecs_t now = _mi_clock_now();
-  const mi_msecs_t arenas_expire = mi_atomic_load_acquire(&subproc->purge_expire);
+  const mi_msecs_t arenas_expire = mi_atomic_loadi64_acquire(&subproc->purge_expire);
   if (!visit_all && !force && (arenas_expire == 0 || arenas_expire > now)) return;
 
   const size_t max_arena = mi_arenas_get_count(subproc);
@@ -1717,7 +1756,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld)
   mi_atomic_guard(&purge_guard)
   {
     // increase global expire: at most one purge per delay cycle
-    if (arenas_expire > now) { mi_atomic_store_release(&subproc->purge_expire, now + (delay/10)); }
+    if (arenas_expire > now) { mi_atomic_storei64_release(&subproc->purge_expire, now + (delay/10)); }
     const size_t arena_start = tld->thread_seq % max_arena;
     size_t max_purge_count = (visit_all ? max_arena : (max_arena/4)+1);
     bool all_visited = true;
@@ -1738,7 +1777,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld)
       }
     }
     if (all_visited && !any_purged) {
-      mi_atomic_store_release(&subproc->purge_expire, 0);
+      mi_atomic_storei64_release(&subproc->purge_expire, 0);
     }
   }
 }
@@ -1843,30 +1882,22 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t*
   }
 
   // find accessed size
-  size_t asize;
-  // scan the commit map for the highest entry
-  // scan the commit map for the highest entry
-  size_t idx;
-  //if (mi_bitmap_bsr(arena->slices_committed, &idx)) {
-  //  asize = (idx + 1)* MI_ARENA_SLICE_SIZE;
-  //}
-  if (mi_bitmap_bsr(arena->pages, &idx)) {
-    mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, idx);
-    const size_t page_slice_count = page->memid.mem.arena.slice_count;
-    asize = mi_size_of_slices(idx + page_slice_count);
-  }
-  else {
-    asize = mi_arena_info_slices(arena) * MI_ARENA_SLICE_SIZE;
-  }
+  const size_t asize = mi_size_of_slices(mi_arena_used_slices(arena));  
   if (base != NULL) { *base = (void*)arena; }
   if (full_size != NULL) { *full_size = arena->memid.mem.os.size;  }
   if (accessed_size != NULL) { *accessed_size = asize; }
 
+  // adjust abandoned page count
+  mi_subproc_t* const subproc = arena->subproc;
+  for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) {
+    const size_t count = mi_bitmap_popcount(arena->pages_abandoned[bin]);
+    if (count > 0) { mi_atomic_decrement_acq_rel(&subproc->abandoned_count[bin]); }
+  }
+
   // unregister the pages
   _mi_page_map_unregister_range(arena, asize);
 
-  // set the entry to NULL
-  mi_subproc_t* subproc = arena->subproc;
+  // set arena entry to NULL
   const size_t count = mi_arenas_get_count(subproc);
   for(size_t i = 0; i < count; i++) {
     if (mi_arena_from_index(subproc, i) == arena) {
@@ -1881,7 +1912,7 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t*
   return true;
 }
 
-mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id) {
+mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id) {
   // assume the memory area is already containing the arena
   if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); }
   if (start == NULL || size == 0) return false;
@@ -1904,12 +1935,22 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* are
     return false;
   }
 
+  // re-initialize
   arena->is_exclusive = true;
+  arena->commit_fun = commit_fun;
+  arena->commit_fun_arg = commit_fun_arg;
   arena->subproc = _mi_subproc();
   if (!mi_arenas_add(arena->subproc, arena, arena_id)) {
     return false;
   }
   mi_arena_pages_reregister(arena);
+
+  // adjust abandoned page count
+  for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) {
+    const size_t count = mi_bitmap_popcount(arena->pages_abandoned[bin]);
+    if (count > 0) { mi_atomic_decrement_acq_rel(&arena->subproc->abandoned_count[bin]); }
+  }
+
   return true;
 }
 
diff --git a/src/bitmap.c b/src/bitmap.c
index f7f94ddb..c07792d0 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -961,6 +961,16 @@ static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) {
   return false;
 }
 
+static size_t mi_bchunk_popcount(mi_bchunk_t* chunk) {
+  size_t popcount = 0;
+  for (size_t i = 0; i < MI_BCHUNK_FIELDS; i++) {
+    const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
+    popcount += mi_bfield_popcount(b);
+  }
+  return popcount;
+}
+
+
 /* --------------------------------------------------------------------------------
  bitmap chunkmap
 -------------------------------------------------------------------------------- */
@@ -1284,6 +1294,25 @@ bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx) {
   return false;
 }
 
+// Return count of all set bits in a bitmap.
+size_t mi_bitmap_popcount(mi_bitmap_t* bitmap) {
+  // for all chunkmap entries
+  size_t popcount = 0;
+  const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
+  for (size_t i = 0; i < chunkmap_max; i++) {
+    mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
+    size_t cmap_idx;
+    // for each chunk (corresponding to a set bit in a chunkmap entry)
+    while (mi_bfield_foreach_bit(&cmap_entry, &cmap_idx)) {
+      const size_t chunk_idx = i*MI_BFIELD_BITS + cmap_idx;
+      // count bits in a chunk
+      popcount += mi_bchunk_popcount(&bitmap->chunks[chunk_idx]);
+    }
+  }
+  return popcount;
+}
+
+
 
 // Clear a bit once it is set.
 void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx) {
@@ -1373,7 +1402,7 @@ bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* vis
 
 
 size_t mi_bbitmap_size(size_t bit_count, size_t* pchunk_count) {
-  mi_assert_internal((bit_count % MI_BCHUNK_BITS) == 0);
+  // mi_assert_internal((bit_count % MI_BCHUNK_BITS) == 0);
   bit_count = _mi_align_up(bit_count, MI_BCHUNK_BITS);
   mi_assert_internal(bit_count <= MI_BITMAP_MAX_BIT_COUNT);
   mi_assert_internal(bit_count > 0);
@@ -1411,25 +1440,27 @@ void mi_bbitmap_unsafe_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
 -------------------------------------------------------------------------------- */
 
 // Assign a specific size bin to a chunk
-static void mi_bbitmap_set_chunk_bin(mi_bbitmap_t* bbitmap, size_t chunk_idx, mi_bbin_t bin) {
+static void mi_bbitmap_set_chunk_bin(mi_bbitmap_t* bbitmap, size_t chunk_idx, mi_chunkbin_t bin) {
   mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
-  for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) {
+  for (mi_chunkbin_t ibin = MI_CBIN_SMALL; ibin < MI_CBIN_NONE; ibin = mi_chunkbin_inc(ibin)) {
     if (ibin == bin) {
-      mi_bchunk_set(& bbitmap->chunkmap_bins[ibin], chunk_idx, NULL);
+      const bool was_clear = mi_bchunk_set(& bbitmap->chunkmap_bins[ibin], chunk_idx, NULL);
+      if (was_clear) { mi_os_stat_increase(chunk_bins[ibin],1); }
     }
     else {
-      mi_bchunk_clear(&bbitmap->chunkmap_bins[ibin], chunk_idx, NULL);
+      const bool was_set = mi_bchunk_clear(&bbitmap->chunkmap_bins[ibin], chunk_idx, NULL);
+      if (was_set) { mi_os_stat_decrease(chunk_bins[ibin],1); }
     }
   }  
 }
 
-mi_bbin_t mi_bbitmap_debug_get_bin(const mi_bchunkmap_t* chunkmap_bins, size_t chunk_idx) {
-  for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) {
+mi_chunkbin_t mi_bbitmap_debug_get_bin(const mi_bchunkmap_t* chunkmap_bins, size_t chunk_idx) {
+  for (mi_chunkbin_t ibin = MI_CBIN_SMALL; ibin < MI_CBIN_NONE; ibin = mi_chunkbin_inc(ibin)) {
     if (mi_bchunk_is_xsetN(MI_BIT_SET, &chunkmap_bins[ibin], chunk_idx, 1)) {
       return ibin;
     }
   }
-  return MI_BBIN_NONE;
+  return MI_CBIN_NONE;
 }
 
 // Track the index of the highest chunk that is accessed.
@@ -1446,7 +1477,7 @@ static void mi_bbitmap_chunkmap_set(mi_bbitmap_t* bbitmap, size_t chunk_idx, boo
   if (check_all_set) {
     if (mi_bchunk_all_are_set_relaxed(&bbitmap->chunks[chunk_idx])) {
       // all slices are free in this chunk: return back to the NONE bin
-      mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, MI_BBIN_NONE);
+      mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, MI_CBIN_NONE);
     }
   }
   mi_bchunk_set(&bbitmap->chunkmap, chunk_idx, NULL);
@@ -1557,7 +1588,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
   mi_assert_internal(MI_BFIELD_BITS >= MI_BCHUNK_FIELDS);
   const mi_bfield_t cmap_mask  = mi_bfield_mask(cmap_max_count,0);
   const size_t cmap_cycle      = cmap_acc+1;
-  const mi_bbin_t bbin = mi_bbin_of(n); 
+  const mi_chunkbin_t bbin = mi_chunkbin_of(n); 
   // visit each cmap entry
   size_t cmap_idx = 0;
   mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
@@ -1568,29 +1599,29 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
     if (cmap_entry == 0) continue;
 
     // get size bin masks
-    mi_bfield_t cmap_bins[MI_BBIN_COUNT] = { 0 };
-    cmap_bins[MI_BBIN_NONE] = cmap_entry;
-    for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) {
+    mi_bfield_t cmap_bins[MI_CBIN_COUNT] = { 0 };
+    cmap_bins[MI_CBIN_NONE] = cmap_entry;
+    for (mi_chunkbin_t ibin = MI_CBIN_SMALL; ibin < MI_CBIN_NONE; ibin = mi_chunkbin_inc(ibin)) {
       const mi_bfield_t cmap_bin = mi_atomic_load_relaxed(&bbitmap->chunkmap_bins[ibin].bfields[cmap_idx]);
       cmap_bins[ibin] = cmap_bin & cmap_entry;
-      cmap_bins[MI_BBIN_NONE] &= ~cmap_bin;      // clear bits that are in an assigned size bin
+      cmap_bins[MI_CBIN_NONE] &= ~cmap_bin;      // clear bits that are in an assigned size bin
     }
 
     // consider only chunks for a particular size bin at a time    
     // this picks the best bin only within a cmap entry (~ 1GiB address space), but avoids multiple
     // iterations through all entries.
-    mi_assert_internal(bbin < MI_BBIN_NONE);
-    for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin <= MI_BBIN_NONE;
+    mi_assert_internal(bbin < MI_CBIN_NONE);
+    for (mi_chunkbin_t ibin = MI_CBIN_SMALL; ibin <= MI_CBIN_NONE;
           // skip from bbin to NONE (so, say, a SMALL will never be placed in a OTHER, MEDIUM, or LARGE chunk to reduce fragmentation)
-          ibin = (ibin == bbin ? MI_BBIN_NONE : mi_bbin_inc(ibin)))
+          ibin = (ibin == bbin ? MI_CBIN_NONE : mi_chunkbin_inc(ibin)))
     {
-      mi_assert_internal(ibin < MI_BBIN_COUNT);
+      mi_assert_internal(ibin < MI_CBIN_COUNT);
       const mi_bfield_t cmap_bin = cmap_bins[ibin];      
       size_t eidx = 0;
       mi_bfield_cycle_iterate(cmap_bin, tseq, cmap_entry_cycle, eidx, Y)  
       {
         // assertion doesn't quite hold as the max_accessed may be out-of-date
-        // mi_assert_internal(cmap_entry_cycle > eidx || ibin == MI_BBIN_NONE);
+        // mi_assert_internal(cmap_entry_cycle > eidx || ibin == MI_CBIN_NONE);
 
         // get the chunk 
         const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
@@ -1598,7 +1629,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
 
         size_t cidx;
         if ((*on_find)(chunk, n, &cidx)) {
-          if (cidx==0 && ibin == MI_BBIN_NONE) { // only the first block determines the size bin
+          if (cidx==0 && ibin == MI_CBIN_NONE) { // only the first block determines the size bin
             // this chunk is now reserved for the `bbin` size class
             mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, bbin);
           }
diff --git a/src/bitmap.h b/src/bitmap.h
index 0237d005..45ae8fe5 100644
--- a/src/bitmap.h
+++ b/src/bitmap.h
@@ -71,8 +71,18 @@ typedef size_t mi_bfield_t;
 #define MI_BCHUNK_FIELDS             (MI_BCHUNK_BITS / MI_BFIELD_BITS)  // 8 on both 64- and 32-bit
 
 
+// some compiler (msvc in C mode) cannot have expressions in the alignment attribute
+#if MI_BCHUNK_SIZE==64
+#define mi_decl_bchunk_align  mi_decl_align(64)
+#elif MI_BCHUNK_SIZE==32
+#define mi_decl_bchunk_align  mi_decl_align(32)
+#else
+#define mi_decl_bchunk_align  mi_decl_align(MI_BCHUNK_SIZE)
+#endif
+ 
+
 // A bitmap chunk contains 512 bits on 64-bit  (256 on 32-bit)
-typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bchunk_s {
+typedef mi_decl_bchunk_align struct mi_bchunk_s {
   _Atomic(mi_bfield_t) bfields[MI_BCHUNK_FIELDS];
 } mi_bchunk_t;
 
@@ -96,7 +106,7 @@ typedef mi_bchunk_t mi_bchunkmap_t;
 
 
 // An atomic bitmap
-typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s {
+typedef mi_decl_bchunk_align struct mi_bitmap_s {
   _Atomic(size_t)  chunk_count;         // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
   size_t           _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1];    // suppress warning on msvc
   mi_bchunkmap_t   chunkmap;
@@ -196,6 +206,9 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx);
 // Used for unloading arena's
 bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx);
 
+// Return count of all set bits in a bitmap.
+size_t mi_bitmap_popcount(mi_bitmap_t* bitmap);
+
 
 typedef bool (mi_forall_set_fun_t)(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg2);
 
@@ -212,43 +225,36 @@ bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* vis
   much fragmentation since we keep chunks for larger blocks separate.
 ---------------------------------------------------------------------------- */
 
-// Size bins; larger bins are allowed to go into smaller bins.
-// SMALL can only be in small (and NONE), so they cannot fragment the larger bins.
-typedef enum mi_bbin_e {
-  MI_BBIN_SMALL,    // slice_count == 1
-  MI_BBIN_OTHER,    // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
-  MI_BBIN_MEDIUM,   // slice_count == 8
-  MI_BBIN_LARGE,    // slice_count == MI_BFIELD_BITS  -- only used if MI_ENABLE_LARGE_PAGES is 1
-  MI_BBIN_NONE,     // no bin assigned yet (the chunk is completely free)
-  MI_BBIN_COUNT
-} mi_bbin_t;
+// mi_chunkbin_t is defined in mimalloc-stats.h
 
-static inline mi_bbin_t mi_bbin_inc(mi_bbin_t bbin) {
-  mi_assert_internal(bbin < MI_BBIN_COUNT);
-  return (mi_bbin_t)((int)bbin + 1);
+static inline mi_chunkbin_t mi_chunkbin_inc(mi_chunkbin_t bbin) {
+  mi_assert_internal(bbin < MI_CBIN_COUNT);
+  return (mi_chunkbin_t)((int)bbin + 1);
 }
 
-static inline mi_bbin_t mi_bbin_dec(mi_bbin_t bbin) {
-  mi_assert_internal(bbin > MI_BBIN_NONE);
-  return (mi_bbin_t)((int)bbin - 1);
+static inline mi_chunkbin_t mi_chunkbin_dec(mi_chunkbin_t bbin) {
+  mi_assert_internal(bbin > MI_CBIN_NONE);
+  return (mi_chunkbin_t)((int)bbin - 1);
 }
 
-static inline mi_bbin_t mi_bbin_of(size_t slice_count) {
-  if (slice_count==1) return MI_BBIN_SMALL;
-  if (slice_count==8) return MI_BBIN_MEDIUM;
+static inline mi_chunkbin_t mi_chunkbin_of(size_t slice_count) {
+  if (slice_count==1) return MI_CBIN_SMALL;
+  if (slice_count==8) return MI_CBIN_MEDIUM;
   #if MI_ENABLE_LARGE_PAGES
-  if (slice_count==MI_BFIELD_BITS) return MI_BBIN_LARGE;
+  if (slice_count==MI_BFIELD_BITS) return MI_CBIN_LARGE;
   #endif
-  return MI_BBIN_OTHER;
+  return MI_CBIN_OTHER;
 }
 
 // An atomic "binned" bitmap for the free slices where we keep chunks reserved for particalar size classes
-typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bbitmap_s {
+typedef mi_decl_bchunk_align struct mi_bbitmap_s {
   _Atomic(size_t)  chunk_count;         // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
   _Atomic(size_t)  chunk_max_accessed;  // max chunk index that was once cleared or set
-  size_t           _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2];    // suppress warning on msvc
+  #if (MI_BCHUNK_SIZE / MI_SIZE_SIZE) > 2
+  size_t           _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2];    // suppress warning on msvc by aligning manually
+  #endif
   mi_bchunkmap_t   chunkmap;                                    
-  mi_bchunkmap_t   chunkmap_bins[MI_BBIN_COUNT - 1];             // chunkmaps with bit set if the chunk is in that size class (excluding MI_BBIN_NONE)  
+  mi_bchunkmap_t   chunkmap_bins[MI_CBIN_COUNT - 1];             // chunkmaps with bit set if the chunk is in that size class (excluding MI_CBIN_NONE)  
   mi_bchunk_t      chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT];        // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
 } mi_bbitmap_t;
 
@@ -261,7 +267,7 @@ static inline size_t mi_bbitmap_max_bits(const mi_bbitmap_t* bbitmap) {
   return (mi_bbitmap_chunk_count(bbitmap) * MI_BCHUNK_BITS);
 }
 
-mi_bbin_t mi_bbitmap_debug_get_bin(const mi_bchunk_t* chunkmap_bins, size_t chunk_idx);
+mi_chunkbin_t mi_bbitmap_debug_get_bin(const mi_bchunk_t* chunkmap_bins, size_t chunk_idx);
 
 size_t mi_bbitmap_size(size_t bit_count, size_t* chunk_count);
 
diff --git a/src/init.c b/src/init.c
index 54905dc8..6d4ce65e 100644
--- a/src/init.c
+++ b/src/init.c
@@ -83,7 +83,8 @@ const mi_page_t _mi_page_empty = {
   { { 0 }, { 0 }, { 0 }, { 0 } }, \
   \
   { MI_INIT74(MI_STAT_COUNT_NULL) }, \
-  { MI_INIT74(MI_STAT_COUNT_NULL) }
+  { MI_INIT74(MI_STAT_COUNT_NULL) }, \
+  { MI_INIT5(MI_STAT_COUNT_NULL) }
 
 // --------------------------------------------------------
 // Statically allocate an empty heap as the initial
diff --git a/src/os.c b/src/os.c
index 01ec2c46..bf7cca67 100644
--- a/src/os.c
+++ b/src/os.c
@@ -96,6 +96,11 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
   return NULL;
 }
 
+
+/* -----------------------------------------------------------
+  Guard page allocation
+----------------------------------------------------------- */
+
 // In secure mode, return the size of a guard page, otherwise 0
 size_t _mi_os_secure_guard_page_size(void) {
   #if MI_SECURE > 0
@@ -104,42 +109,61 @@ size_t _mi_os_secure_guard_page_size(void) {
   return 0;
   #endif
 }
-
+  
 // In secure mode, try to decommit an area and output a warning if this fails.
-bool _mi_os_secure_guard_page_set_at(void* addr, bool is_pinned) {
+bool _mi_os_secure_guard_page_set_at(void* addr, mi_memid_t memid) {
   if (addr == NULL) return true;
   #if MI_SECURE > 0
-  const bool ok = (is_pinned ? false : _mi_os_decommit(addr, _mi_os_secure_guard_page_size()));
+  bool ok = false;
+  if (!memid.is_pinned) {
+    mi_arena_t* const arena = mi_memid_arena(memid);
+    if (arena != NULL && arena->commit_fun != NULL) {
+      ok = (*(arena->commit_fun))(false /* decommit */, addr, _mi_os_secure_guard_page_size(), NULL, arena->commit_fun_arg);
+    }
+    else {
+      ok = _mi_os_decommit(addr, _mi_os_secure_guard_page_size());
+    }
+  }
   if (!ok) {
     _mi_error_message(EINVAL, "secure level %d, but failed to commit guard page (at %p of size %zu)\n", MI_SECURE, addr, _mi_os_secure_guard_page_size());
   }
   return ok;
   #else
-  MI_UNUSED(is_pinned);
+  MI_UNUSED(memid);
   return true;
   #endif
 }
 
 // In secure mode, try to decommit an area and output a warning if this fails.
-bool _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned) {
-  return _mi_os_secure_guard_page_set_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), is_pinned);
+bool _mi_os_secure_guard_page_set_before(void* addr, mi_memid_t memid) {
+  return _mi_os_secure_guard_page_set_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), memid);
 }
 
 // In secure mode, try to recommit an area
-bool _mi_os_secure_guard_page_reset_at(void* addr) {
+bool _mi_os_secure_guard_page_reset_at(void* addr, mi_memid_t memid) {
   if (addr == NULL) return true;
   #if MI_SECURE > 0
-  return _mi_os_commit(addr, _mi_os_secure_guard_page_size(), NULL);
+  if (!memid.is_pinned) {
+    mi_arena_t* const arena = mi_memid_arena(memid);
+    if (arena != NULL && arena->commit_fun != NULL) {
+      return (*(arena->commit_fun))(true, addr, _mi_os_secure_guard_page_size(), NULL, arena->commit_fun_arg);
+    }
+    else {
+      return _mi_os_commit(addr, _mi_os_secure_guard_page_size(), NULL);
+    }
+  }
   #else
-  return true;
+  MI_UNUSED(memid);
   #endif
+  return true;
 }
 
 // In secure mode, try to recommit an area
-bool _mi_os_secure_guard_page_reset_before(void* addr) {
-  return _mi_os_secure_guard_page_reset_at((uint8_t*)addr - _mi_os_secure_guard_page_size());
+bool _mi_os_secure_guard_page_reset_before(void* addr, mi_memid_t memid) {
+  return _mi_os_secure_guard_page_reset_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), memid);
 }
 
+
 /* -----------------------------------------------------------
   Free memory
 -------------------------------------------------------------- */
@@ -507,14 +531,18 @@ bool _mi_os_reset(void* addr, size_t size) {
 
 // either resets or decommits memory, returns true if the memory needs
 // to be recommitted if it is to be re-used later on.
-bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size)
+bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size, mi_commit_fun_t* commit_fun, void* commit_fun_arg)
 {
   if (mi_option_get(mi_option_purge_delay) < 0) return false;  // is purging allowed?
   mi_os_stat_counter_increase(purge_calls, 1);
   mi_os_stat_increase(purged, size);
 
-  if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
-    !_mi_preloading())                                     // don't decommit during preloading (unsafe)
+  if (commit_fun != NULL) {
+    bool decommitted = (*commit_fun)(false, p, size, NULL, commit_fun_arg);
+    return decommitted; // needs_recommit?
+  }
+  else if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
+           !_mi_preloading())                                   // don't decommit during preloading (unsafe)
   {
     bool needs_recommit = true;
     mi_os_decommit_ex(p, size, &needs_recommit, stat_size);
@@ -531,7 +559,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size)
 // either resets or decommits memory, returns true if the memory needs
 // to be recommitted if it is to be re-used later on.
 bool _mi_os_purge(void* p, size_t size) {
-  return _mi_os_purge_ex(p, size, true, size);
+  return _mi_os_purge_ex(p, size, true, size, NULL, NULL);
 }
 
 
diff --git a/src/page-queue.c b/src/page-queue.c
index 4c30c970..3e2315cc 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -106,6 +106,7 @@ size_t _mi_bin(size_t size) {
 }
 
 size_t _mi_bin_size(size_t bin) {
+  mi_assert_internal(bin <= MI_BIN_HUGE);
   return _mi_heap_empty.pages[bin].block_size;
 }
 
diff --git a/src/page.c b/src/page.c
index b6f3bffe..50c978ff 100644
--- a/src/page.c
+++ b/src/page.c
@@ -429,7 +429,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
   if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) {  // not full or huge queue?
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_heap_t* heap = mi_page_heap(page);
-      mi_debug_heap_stat_counter_increase(heap, pages_retire, 1);
+      #if MI_STAT>0
+      mi_heap_stat_counter_increase(heap, pages_retire, 1);
+      #endif
       page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
       mi_assert_internal(pq >= heap->pages);
       const size_t index = pq - heap->pages;
@@ -618,7 +620,9 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) {
   size_t page_size;
   //uint8_t* page_start =
   mi_page_area(page, &page_size);
-  mi_debug_heap_stat_counter_increase(heap, pages_extended, 1);
+  #if MI_STAT>0
+  mi_heap_stat_counter_increase(heap, pages_extended, 1);
+  #endif
 
   // calculate the extend count
   const size_t bsize = mi_page_block_size(page);
@@ -658,7 +662,9 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) {
   }
   // enable the new free list
   page->capacity += (uint16_t)extend;
-  mi_debug_heap_stat_increase(heap, page_committed, extend * bsize);
+  #if MI_STAT>0
+  mi_heap_stat_increase(heap, page_committed, extend * bsize);
+  #endif
   mi_assert_expensive(mi_page_is_valid_init(page));
 }
 
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 88b520c8..574e5678 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -848,7 +848,7 @@ bool _mi_prim_thread_is_in_threadpool(void) {
   if (win_major_version >= 6) {
     // check if this thread belongs to a windows threadpool
     // see: <https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/pebteb/teb/index.htm>
-    _TEB* const teb = NtCurrentTeb();
+    struct _TEB* const teb = NtCurrentTeb();
     void* const pool_data = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778)));
     return (pool_data != NULL);
   }
diff --git a/src/stats.c b/src/stats.c
index 27dc69d0..7a761d82 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -479,6 +479,11 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
 // Return statistics
 // --------------------------------------------------------
 
+size_t mi_stats_get_bin_size(size_t bin) mi_attr_noexcept {
+  if (bin > MI_BIN_HUGE) return 0;
+  return _mi_bin_size(bin);
+}
+
 void mi_stats_get(size_t stats_size, mi_stats_t* stats) mi_attr_noexcept {
   if (stats == NULL || stats_size == 0) return;
   _mi_memzero(stats, stats_size);
@@ -529,7 +534,7 @@ static void mi_heap_buf_print(mi_heap_buf_t* hbuf, const char* msg) {
 }
 
 static void mi_heap_buf_print_count_bin(mi_heap_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, size_t bin, bool add_comma) {
-  const size_t binsize = _mi_bin_size(bin);
+  const size_t binsize = mi_stats_get_bin_size(bin);
   const size_t pagesize = (binsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_SMALL_PAGE_SIZE :
                             (binsize <= MI_MEDIUM_MAX_OBJ_SIZE ? MI_MEDIUM_PAGE_SIZE :
                               (binsize <= MI_LARGE_MAX_OBJ_SIZE ? MI_LARGE_PAGE_SIZE : 0)));