From 657135de36edad2082323426aea3e2fa1a9cf19a Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 23 Dec 2024 09:53:52 -0800
Subject: [PATCH 1/5] commit 2level page-map on over-commit systems

---
 CMakeLists.txt              | 18 +++++++++++-------
 include/mimalloc/internal.h | 26 ++++++++++++--------------
 src/options.c               |  2 +-
 src/page-map.c              |  3 ++-
 4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 07a292e0..c184a0b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,10 +10,9 @@ option(MI_PADDING           "Enable padding to detect heap block overflow (alway
 option(MI_OVERRIDE          "Override the standard malloc interface (i.e. define entry points for 'malloc', 'free', etc)" ON)
 option(MI_XMALLOC           "Enable abort() call on memory allocation failure by default" OFF)
 option(MI_SHOW_ERRORS       "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
-option(MI_TRACK_VALGRIND    "Compile with Valgrind support (adds a small overhead)" OFF)
-option(MI_TRACK_ASAN        "Compile with address sanitizer support (adds a small overhead)" OFF)
-option(MI_TRACK_ETW         "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF)
+option(MI_GUARDED           "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
 option(MI_USE_CXX           "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
+
 option(MI_OPT_ARCH          "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" ON)
 option(MI_OPT_SIMD          "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF)
 option(MI_SEE_ASM           "Generate assembly files" OFF)
@@ -21,14 +20,19 @@ option(MI_OSX_INTERPOSE     "Use interpose to override standard malloc on macOS"
 option(MI_OSX_ZONE          "Use malloc zone to override standard malloc on macOS" ON)
 option(MI_WIN_REDIRECT      "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
 option(MI_LOCAL_DYNAMIC_TLS "Use local-dynamic-tls, a slightly slower but dlopen-compatible thread local storage mechanism (Unix)" OFF)
-option(MI_LIBC_MUSL         "Set this when linking with musl libc" OFF)
+option(MI_LIBC_MUSL         "Enable this when linking with musl libc" OFF)
+
+option(MI_DEBUG_TSAN        "Build with thread sanitizer (needs clang)" OFF)
+option(MI_DEBUG_UBSAN       "Build with undefined-behavior sanitizer (needs clang++)" OFF)
+option(MI_TRACK_VALGRIND    "Compile with Valgrind support (adds a small overhead)" OFF)
+option(MI_TRACK_ASAN        "Compile with address sanitizer support (adds a small overhead)" OFF)
+option(MI_TRACK_ETW         "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF)
+
 option(MI_BUILD_SHARED      "Build shared library" ON)
 option(MI_BUILD_STATIC      "Build static library" ON)
 option(MI_BUILD_OBJECT      "Build object library" ON)
 option(MI_BUILD_TESTS       "Build test executables" ON)
-option(MI_DEBUG_TSAN        "Build with thread sanitizer (needs clang)" OFF)
-option(MI_DEBUG_UBSAN       "Build with undefined-behavior sanitizer (needs clang++)" OFF)
-option(MI_GUARDED           "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
+
 option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF)
 option(MI_NO_PADDING        "Force no use of padding even in DEBUG mode etc." OFF)
 option(MI_INSTALL_TOPLEVEL  "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF)
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index e98a37f5..4cb54d6f 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -435,13 +435,14 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
 
 
 /* -----------------------------------------------------------
-  Pages
+  The page map maps addresses to `mi_page_t` pointers
 ----------------------------------------------------------- */
 
 #if MI_PAGE_MAP_FLAT
 
-// flat page-map committed on demand
+// flat page-map committed on demand, using one byte per slice (64 KiB).
 // single indirection and low commit, but large initial virtual reserve (4 GiB with 48 bit virtual addresses)
+// used by default on <= 40 bit virtual address spaces.
 extern uint8_t* _mi_page_map;
 
 static inline size_t _mi_page_map_index(const void* p) {
@@ -468,26 +469,23 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
 #else
 
 // 2-level page map:
-// double indirection but low commit and low virtual reserve.
-// 
-// The page-map is usually 4 MiB and points to sub maps of 64 KiB. 
-// The page-map is committed on-demand (in 64 KiB) parts (and sub-maps are committed on-demand as well)
-// One sub page-map = 64 KiB => covers 2^13 * 2^16 = 2^32 = 512 MiB address space
-// The page-map needs 48-16-13 = 19 bits => 2^19 sub map pointers = 4 MiB size.
-// (Choosing a MI_PAGE_MAP_SUB_SHIFT of 16 gives slightly better code but will commit the initial sub-map at 512 KiB)
-
+// double indirection, but low commit and low virtual reserve.
+//
+// the page-map is usually 4 MiB and points to sub maps of 64 KiB.
+// the page-map is committed on-demand (in 64 KiB parts) (and sub-maps are committed on-demand as well)
+// one sub page-map = 64 KiB => covers 2^(16-3) * 2^16 = 2^29 = 512 MiB address space
+// the page-map needs 48-(16+13) = 19 bits => 2^19 sub map pointers = 4 MiB size.
 #define MI_PAGE_MAP_SUB_SHIFT     (13)
 #define MI_PAGE_MAP_SUB_COUNT     (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT)
-
 #define MI_PAGE_MAP_SHIFT         (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT)
 #define MI_PAGE_MAP_COUNT         (MI_ZU(1) << MI_PAGE_MAP_SHIFT)
 
 extern mi_page_t*** _mi_page_map;
 
 static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) {
-  const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE;
-  if (sub_idx != NULL) { *sub_idx = (uint32_t)u % MI_PAGE_MAP_SUB_COUNT; }
-  return (size_t)(u / MI_PAGE_MAP_SUB_COUNT);
+  const size_t u = (size_t)((uintptr_t)p / MI_ARENA_SLICE_SIZE);
+  if (sub_idx != NULL) { *sub_idx = u % MI_PAGE_MAP_SUB_COUNT; }
+  return (u / MI_PAGE_MAP_SUB_COUNT);
 }
 
 static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
diff --git a/src/options.c b/src/options.c
index fc3a2838..7562cd46 100644
--- a/src/options.c
+++ b/src/options.c
@@ -103,7 +103,7 @@ typedef struct mi_option_desc_s {
 #endif
 
 #ifndef MI_DEFAULT_PAGEMAP_COMMIT
-#if defined(__APPLE__)
+#if defined(__APPLE__)  // when overloading malloc, we still get mixed pointers sometimes on macOS; this avoids a bad access
 #define MI_DEFAULT_PAGEMAP_COMMIT 1
 #else
 #define MI_DEFAULT_PAGEMAP_COMMIT 0
diff --git a/src/page-map.c b/src/page-map.c
index 37ce3082..db14265b 100644
--- a/src/page-map.c
+++ b/src/page-map.c
@@ -187,7 +187,8 @@ bool _mi_page_map_init(void) {
   const size_t os_page_size = _mi_os_page_size();
   const size_t page_map_size = _mi_align_up( page_map_count * sizeof(mi_page_t**), os_page_size);
   const size_t reserve_size = page_map_size + os_page_size;
-  const bool commit = page_map_size <= 64*MI_KiB || mi_option_is_enabled(mi_option_pagemap_commit); // _mi_os_has_overcommit(); // commit on-access on Linux systems?
+  const bool commit = page_map_size <= 64*MI_KiB || 
+                      mi_option_is_enabled(mi_option_pagemap_commit) || _mi_os_has_overcommit(); 
   _mi_page_map = (mi_page_t***)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
   if (_mi_page_map==NULL) {
     _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);

From 88d8ee964f818b09ccd56c078b90851c78cd9af2 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 23 Dec 2024 15:04:06 -0800
Subject: [PATCH 2/5] remove is_large member (and use is_pinned for this)

---
 doc/mimalloc-doc.h          |  7 +++----
 include/mimalloc.h          |  4 ++--
 include/mimalloc/internal.h |  4 ++--
 src/arena.c                 | 23 ++++++++++-------------
 4 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h
index e1c14b44..e9da9b90 100644
--- a/doc/mimalloc-doc.h
+++ b/doc/mimalloc-doc.h
@@ -431,12 +431,11 @@ int  mi_reserve_os_memory(size_t size, bool commit, bool allow_large);
 /// @param start       Start of the memory area
 /// @param size        The size of the memory area.
 /// @param is_committed Is the area already committed?
-/// @param is_large    Does it consist of large OS pages? Set this to \a true as well for memory
-///                    that should not be decommitted or protected (like rdma etc.)
+/// @param is_pinned   Can the memory not be decommitted or reset? (usually the case for large OS pages)
 /// @param is_zero     Does the area consists of zero's?
 /// @param numa_node   Possible associated numa node or `-1`.
 /// @return \a true if successful, and \a false on error.
-bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node);
+bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node);
 
 /// Reserve \a pages of huge OS pages (1GiB) evenly divided over \a numa_nodes nodes,
 /// but stops after at most `timeout_msecs` seconds.
@@ -589,7 +588,7 @@ void mi_subproc_add_current_thread(mi_subproc_id_t subproc);
 
 /// Allocate \a size bytes aligned by \a alignment.
 /// @param size  number of bytes to allocate.
-/// @param alignment  the minimal alignment of the allocated memory.       
+/// @param alignment  the minimal alignment of the allocated memory.
 /// @returns pointer to the allocated memory or \a NULL if out of memory,
 /// or if the alignment is not a power of 2 (including 0). The \a size is unrestricted
 /// (and does not have to be an integral multiple of the \a alignment).
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 8bff8923..508e6aec 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -274,7 +274,7 @@ mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa
 mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
 
 mi_decl_export int  mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
-mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
+mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned /* cannot decommit/reset? */, bool is_zero, int numa_node) mi_attr_noexcept;
 
 mi_decl_export void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept;
 
@@ -283,7 +283,7 @@ typedef void* mi_arena_id_t;
 mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
 mi_decl_export int   mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 mi_decl_export int   mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
-mi_decl_export bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
+mi_decl_export bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
 
 #if MI_MALLOC_VERSION >= 182
 // Create a heap that only allocates in the specified arena
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 4cb54d6f..281f531a 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -143,8 +143,8 @@ mi_arena_id_t _mi_arena_id_none(void);
 mi_arena_t*   _mi_arena_from_id(mi_arena_id_t id);
 bool          _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena);
 
-void*         _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid);
-void*         _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid);
+void*         _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid);
+void*         _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid);
 void          _mi_arenas_free(void* p, size_t size, mi_memid_t memid);
 bool          _mi_arenas_contain(const void* p);
 void          _mi_arenas_collect(bool force_purge, mi_tld_t* tld);
diff --git a/src/arena.c b/src/arena.c
index 00ff3720..7b97fbbc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -41,7 +41,6 @@ typedef struct mi_arena_s {
   size_t              info_slices;          // initial slices reserved for the arena bitmaps
   int                 numa_node;            // associated NUMA node
   bool                is_exclusive;         // only allow allocations if specifically for this arena
-  bool                is_large;             // memory area consists of large- or huge OS pages (always committed)
   _Atomic(mi_msecs_t) purge_expire;         // expiration time when slices can be purged from `slices_purge`.
 
   mi_bitmap_t*        slices_free;          // is the slice free?
@@ -333,8 +332,8 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_
   Arena iteration
 ----------------------------------------------------------- */
 
-static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_large) {
-  if (!allow_large && arena->is_large) return false;
+static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_pinned) {
+  if (!allow_pinned && arena->memid.is_pinned) return false;
   if (!mi_arena_id_is_suitable(arena, req_arena)) return false;
   if (req_arena == NULL) { // if not specific, check numa affinity
     const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
@@ -1104,7 +1103,7 @@ static mi_bitmap_t* mi_arena_bitmap_init(size_t slice_count, uint8_t** base) {
 }
 
 
-static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
+static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
 {
   mi_assert(!is_large || (memid.initially_committed && memid.is_pinned));
   mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE));
@@ -1154,8 +1153,7 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s
   arena->is_exclusive = exclusive;
   arena->slice_count  = slice_count;
   arena->info_slices  = info_slices;
-  arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
-  arena->is_large     = is_large;
+  arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)  
   arena->purge_expire = 0;
   // mi_lock_init(&arena->abandoned_visit_lock);
 
@@ -1190,14 +1188,14 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s
 }
 
 
-bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
   mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
   memid.mem.os.base = start;
   memid.mem.os.size = size;
   memid.initially_committed = is_committed;
   memid.initially_zero = is_zero;
-  memid.is_pinned = is_large;
-  return mi_manage_os_memory_ex2(_mi_subproc(), start, size, is_large, numa_node, exclusive, memid, arena_id);
+  memid.is_pinned = is_pinned;
+  return mi_manage_os_memory_ex2(_mi_subproc(), start, size, numa_node, exclusive, memid, arena_id);
 }
 
 // Reserve a range of regular OS memory
@@ -1207,13 +1205,12 @@ static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool com
   mi_memid_t memid;
   void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid);
   if (start == NULL) return ENOMEM;
-  const bool is_large = memid.is_pinned; // todo: use separate is_large field?
-  if (!mi_manage_os_memory_ex2(subproc, start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) {
+  if (!mi_manage_os_memory_ex2(subproc, start, size, -1 /* numa node */, exclusive, memid, arena_id)) {
     _mi_os_free_ex(start, size, commit, memid);
     _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024));
     return ENOMEM;
   }
-  _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : "");
+  _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), memid.is_pinned ? " (in large os pages)" : "");
   // mi_debug_show_arenas(true, true, false);
 
   return 0;
@@ -1373,7 +1370,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m
   }
   _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
 
-  if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, true, numa_node, exclusive, memid, arena_id)) {
+  if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, numa_node, exclusive, memid, arena_id)) {
     _mi_os_free(p, hsize, memid);
     return ENOMEM;
   }

From b515a0ad4c58f1e264213f22998c628470746bc1 Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 23 Dec 2024 16:28:34 -0800
Subject: [PATCH 3/5] add _mi_os_guard_page_size

---
 include/mimalloc/internal.h |  8 ++++++
 include/mimalloc/types.h    |  7 +++--
 src/arena-meta.c            | 26 +++++++-----------
 src/arena.c                 | 38 +++++++++++---------------
 src/os.c                    | 54 ++++++++++++++++++++++++++++++++++++-
 5 files changed, 91 insertions(+), 42 deletions(-)

diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 281f531a..7c49d590 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -116,6 +116,7 @@ void          _mi_os_free(void* p, size_t size, mi_memid_t memid);
 void          _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid);
 
 size_t        _mi_os_page_size(void);
+size_t        _mi_os_guard_page_size(void);
 size_t        _mi_os_good_alloc_size(size_t size);
 bool          _mi_os_has_overcommit(void);
 bool          _mi_os_has_virtual_reserve(void);
@@ -129,6 +130,13 @@ bool          _mi_os_unprotect(void* addr, size_t size);
 bool          _mi_os_purge(void* p, size_t size);
 bool          _mi_os_purge_ex(void* p, size_t size, bool allow_reset);
 
+size_t        _mi_os_secure_guard_page_size(void);
+bool          _mi_os_secure_guard_page_set_at(void* addr, bool is_pinned);
+bool          _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned);
+bool          _mi_os_secure_guard_page_reset_at(void* addr);
+bool          _mi_os_secure_guard_page_reset_before(void* addr);
+
+
 void*         _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
 void*         _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
 
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 84179458..c2ce4a26 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -46,8 +46,12 @@ terms of the MIT license. A copy of the license can be found in the file
 // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
 // #define MI_STAT 1
 
-// Define MI_SECURE to enable security mitigations. The lowest two have minimal performance impact:
+// Define MI_SECURE to enable security mitigations. Level 1 has minimal performance impact,
+// but protects most metadata with guard pages:
 //   #define MI_SECURE 1  // guard page around metadata
+// 
+// Level 2 has more performance impact but protect well against various buffer overflows 
+// by surrounding all mimalloc pages with guard pages:
 //   #define MI_SECURE 2  // guard page around each mimalloc page (can fragment VMA's with large heaps..)
 // 
 // The next two levels can have more performance cost:
@@ -126,7 +130,6 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_MEDIUM_PAGE_SIZE               (8*MI_SMALL_PAGE_SIZE)                   // 512 KiB  (=byte in the bitmap)
 #define MI_LARGE_PAGE_SIZE                (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE)       // 4 MiB    (=word in the bitmap)
 
-
 // Maximum number of size classes. (spaced exponentially in 12.5% increments)
 #define MI_BIN_HUGE  (73U)
 #define MI_BIN_FULL  (MI_BIN_HUGE+1)
diff --git a/src/arena-meta.c b/src/arena-meta.c
index 34be6e0e..c8c0cac6 100644
--- a/src/arena-meta.c
+++ b/src/arena-meta.c
@@ -25,12 +25,6 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_META_PAGE_SIZE         MI_ARENA_SLICE_SIZE
 #define MI_META_PAGE_ALIGN        MI_ARENA_SLICE_ALIGN
 
-#if MI_SECURE 
-#define MI_META_PAGE_GUARD_SIZE   (4*MI_KiB)
-#else
-#define MI_META_PAGE_GUARD_SIZE   (0)
-#endif
-
 #define MI_META_BLOCK_SIZE        (128)                       // large enough such that META_MAX_SIZE > 4k (even on 32-bit)
 #define MI_META_BLOCK_ALIGN       MI_META_BLOCK_SIZE
 #define MI_META_BLOCKS_PER_PAGE   (MI_ARENA_SLICE_SIZE / MI_META_BLOCK_SIZE)  // 1024
@@ -47,7 +41,7 @@ static mi_decl_cache_align _Atomic(mi_meta_page_t*)  mi_meta_pages = MI_ATOMIC_V
 
 #if MI_DEBUG > 1
 static mi_meta_page_t* mi_meta_page_of_ptr(void* p, size_t* block_idx) {
-  mi_meta_page_t* mpage = (mi_meta_page_t*)((uint8_t*)mi_align_down_ptr(p,MI_META_PAGE_ALIGN) + MI_META_PAGE_GUARD_SIZE);
+  mi_meta_page_t* mpage = (mi_meta_page_t*)((uint8_t*)mi_align_down_ptr(p,MI_META_PAGE_ALIGN) + _mi_os_secure_guard_page_size());
   if (block_idx != NULL) {
     *block_idx = ((uint8_t*)p - (uint8_t*)mpage) / MI_META_BLOCK_SIZE;
   }
@@ -60,9 +54,9 @@ static mi_meta_page_t* mi_meta_page_next( mi_meta_page_t* mpage ) {
 }
 
 static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) {
-  mi_assert_internal(_mi_is_aligned((uint8_t*)mpage - MI_META_PAGE_GUARD_SIZE, MI_META_PAGE_ALIGN));
+  mi_assert_internal(_mi_is_aligned((uint8_t*)mpage - _mi_os_secure_guard_page_size(), MI_META_PAGE_ALIGN));
   mi_assert_internal(block_idx < MI_META_BLOCKS_PER_PAGE);
-  void* p = ((uint8_t*)mpage - MI_META_PAGE_GUARD_SIZE + (block_idx * MI_META_BLOCK_SIZE));
+  void* p = ((uint8_t*)mpage - _mi_os_secure_guard_page_size() + (block_idx * MI_META_BLOCK_SIZE));
   mi_assert_internal(mpage == mi_meta_page_of_ptr(p,NULL));
   return p;
 }
@@ -82,20 +76,18 @@ static mi_meta_page_t* mi_meta_page_zalloc(void) {
   }
 
   // guard pages
-  #if MI_SECURE 
-  if (!memid.is_pinned) {
-    _mi_os_decommit(base, MI_META_PAGE_GUARD_SIZE);
-    _mi_os_decommit(base + MI_META_PAGE_SIZE - MI_META_PAGE_GUARD_SIZE, MI_META_PAGE_GUARD_SIZE);
-  }
+  #if MI_SECURE >= 1
+  _mi_os_secure_guard_page_set_at(base, memid.is_pinned);
+  _mi_os_secure_guard_page_set_before(base + MI_META_PAGE_SIZE, memid.is_pinned);
   #endif
-
+  
   // initialize the page and free block bitmap
-  mi_meta_page_t* mpage = (mi_meta_page_t*)(base + MI_META_PAGE_GUARD_SIZE);
+  mi_meta_page_t* mpage = (mi_meta_page_t*)(base + _mi_os_secure_guard_page_size());
   mpage->memid = memid;
   mi_bitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */);
   const size_t mpage_size  = offsetof(mi_meta_page_t,blocks_free) + mi_bitmap_size(MI_META_BLOCKS_PER_PAGE, NULL);
   const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE);
-  const size_t guard_blocks = _mi_divide_up(MI_META_PAGE_GUARD_SIZE, MI_META_BLOCK_SIZE);
+  const size_t guard_blocks = _mi_divide_up(_mi_os_secure_guard_page_size(), MI_META_BLOCK_SIZE);
   mi_assert_internal(info_blocks + 2*guard_blocks < MI_META_BLOCKS_PER_PAGE);  
   mi_bitmap_unsafe_setN(&mpage->blocks_free, info_blocks + guard_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks - 2*guard_blocks);
 
diff --git a/src/arena.c b/src/arena.c
index 7b97fbbc..3349abb1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -576,12 +576,6 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_
   return NULL;
 }
 
-#if MI_SECURE < 2
-#define MI_ARENA_GUARD_PAGE_SIZE  (0)
-#else
-#define MI_ARENA_GUARD_PAGE_SIZE  (4*MI_KiB)
-#endif
-
 // Allocate a fresh page
 static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment,
                                             mi_arena_t* req_arena, size_t tseq)
@@ -621,11 +615,14 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice
   mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
   mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment));
 
-  // guard page at the end
-  const size_t page_noguard_size = mi_size_of_slices(slice_count) - MI_ARENA_GUARD_PAGE_SIZE;
-  #if MI_SECURE >= 2
-  if (memid.initially_committed && !memid.is_pinned) {
-    _mi_os_decommit((uint8_t*)page + page_noguard_size, MI_ARENA_GUARD_PAGE_SIZE);
+  // guard page at the end of mimalloc page?
+  #if MI_SECURE < 2
+  const size_t page_noguard_size = mi_size_of_slices(slice_count);
+  #else
+  mi_assert(mi_size_of_slices(slice_count) > _mi_os_secure_guard_page_size());
+  const size_t page_noguard_size = mi_size_of_slices(slice_count) - _mi_os_secure_guard_page_size();
+  if (memid.initially_committed) {
+    _mi_os_secure_guard_page_set_at((uint8_t*)page + page_noguard_size, memid.is_pinned);
   }
   #endif
 
@@ -795,7 +792,7 @@ void _mi_arenas_page_free(mi_page_t* page) {
   // we must do this since we may later allocate large spans over this page and cannot have a guard page in between
   #if MI_SECURE >= 2
   if (!page->memid.is_pinned) {
-    _mi_os_commit((uint8_t*)page + mi_memid_size(page->memid) - MI_ARENA_GUARD_PAGE_SIZE, MI_ARENA_GUARD_PAGE_SIZE, NULL);
+    _mi_os_secure_guard_page_reset_before((uint8_t*)page + mi_memid_size(page->memid));
   }
   #endif
 
@@ -1089,7 +1086,7 @@ static size_t mi_arena_info_slices_needed(size_t slice_count, size_t* bitmap_bas
   const size_t size = base_size + bitmaps_size;
 
   const size_t os_page_size = _mi_os_page_size();
-  const size_t info_size = _mi_align_up(size, os_page_size) + MI_ARENA_GUARD_PAGE_SIZE;
+  const size_t info_size = _mi_align_up(size, os_page_size) + _mi_os_secure_guard_page_size();
   const size_t info_slices = mi_slice_count_of_size(info_size);
 
   if (bitmap_base != NULL) *bitmap_base = base_size;
@@ -1105,7 +1102,6 @@ static mi_bitmap_t* mi_arena_bitmap_init(size_t slice_count, uint8_t** base) {
 
 static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
 {
-  mi_assert(!is_large || (memid.initially_committed && memid.is_pinned));
   mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE));
   mi_assert(start!=NULL);
   if (start==NULL) return false;
@@ -1134,17 +1130,15 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s
 
   // commit & zero if needed  
   if (!memid.initially_committed) {
-    // if MI_SECURE, leave a guard OS page decommitted at the end 
-    _mi_os_commit(arena, mi_size_of_slices(info_slices) - MI_ARENA_GUARD_PAGE_SIZE, NULL);
+    // leave a guard OS page decommitted at the end 
+    _mi_os_commit(arena, mi_size_of_slices(info_slices) - _mi_os_secure_guard_page_size(), NULL);
   }
-  else if (!memid.is_pinned) {
-    #if MI_SECURE > 0
-    // if MI_SECURE, decommit a guard OS page at the end of the arena info
-    _mi_os_decommit((uint8_t*)arena + mi_size_of_slices(info_slices) - MI_ARENA_GUARD_PAGE_SIZE, MI_ARENA_GUARD_PAGE_SIZE);
-    #endif  
+  else {
+    // if MI_SECURE, set a guard page at the end
+    _mi_os_secure_guard_page_set_before((uint8_t*)arena + mi_size_of_slices(info_slices), memid.is_pinned);
   }
   if (!memid.initially_zero) {
-    _mi_memzero(arena, mi_size_of_slices(info_slices) - MI_ARENA_GUARD_PAGE_SIZE);
+    _mi_memzero(arena, mi_size_of_slices(info_slices) - _mi_os_secure_guard_page_size());
   }
 
   // init
diff --git a/src/os.c b/src/os.c
index 80d44d12..399aac6c 100644
--- a/src/os.c
+++ b/src/os.c
@@ -61,8 +61,16 @@ size_t _mi_os_large_page_size(void) {
   return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
 }
 
+size_t _mi_os_guard_page_size(void) {
+  const size_t gsize = _mi_os_page_size();
+  mi_assert(gsize <= (MI_ARENA_SLICE_SIZE/8));
+  return gsize;
+}
+
 size_t _mi_os_virtual_address_bits(void) {
-  return mi_os_mem_config.virtual_address_bits;
+  const size_t vbits = mi_os_mem_config.virtual_address_bits;
+  mi_assert(vbits <= MI_MAX_VABITS);
+  return vbits;
 }
 
 bool _mi_os_use_large_page(size_t size, size_t alignment) {
@@ -99,6 +107,50 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
   return NULL;
 }
 
+// In secure mode, return the size of a guard page, otherwise 0
+size_t _mi_os_secure_guard_page_size(void) {
+  #if MI_SECURE > 0
+  return _mi_os_guard_page_size();
+  #else
+  return 0;
+  #endif
+}
+
+// In secure mode, try to decommit an area and output a warning if this fails. 
+bool _mi_os_secure_guard_page_set_at(void* addr, bool is_pinned) {
+  if (addr == NULL) return true;
+  #if MI_SECURE > 0
+  const bool ok = (is_pinned ? false : _mi_os_decommit(addr, _mi_os_secure_guard_page_size()));
+  if (!ok) {
+    _mi_error_message(EINVAL, "secure level %d, but failed to commit guard page (at %p of size %zu)\n", MI_SECURE, addr, _mi_os_secure_guard_page_size());
+  }
+  return ok;
+  #else
+  MI_UNUSED(is_pinned);
+  return true;
+  #endif
+}
+
+// In secure mode, try to decommit an area and output a warning if this fails. 
+bool _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned) {
+  return _mi_os_secure_guard_page_set_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), is_pinned);
+}
+
+// In secure mode, try to recommit an area
+bool _mi_os_secure_guard_page_reset_at(void* addr) {
+  if (addr == NULL) return true;
+  #if MI_SECURE > 0
+  return _mi_os_commit(addr, _mi_os_secure_guard_page_size(), NULL);
+  #else
+  return true;
+  #endif
+}
+
+// In secure mode, try to recommit an area
+bool _mi_os_secure_guard_page_reset_before(void* addr) {
+  return _mi_os_secure_guard_page_reset_at((uint8_t*)addr - _mi_os_secure_guard_page_size());
+}
+
 
 /* -----------------------------------------------------------
   Free memory

From c65c6d83bd0a1c3d00bcbe8ce4fc1bc10ddc947e Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 23 Dec 2024 16:31:42 -0800
Subject: [PATCH 4/5] fix guard page size

---
 ide/vs2022/mimalloc.vcxproj | 2 +-
 src/arena.c                 | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 87e866bb..63bc7d1d 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -190,7 +190,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>Default</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=3;MI_GUARDED=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=3;MI_GUARDED=0;MI_SECURE=4;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp20</LanguageStandard>
diff --git a/src/arena.c b/src/arena.c
index 3349abb1..9ae44d85 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -720,10 +720,10 @@ static mi_page_t* mi_arenas_page_singleton_alloc(mi_heap_t* heap, size_t block_s
   mi_tld_t* const tld = heap->tld;
   const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN);
   const size_t info_size = (os_align ? MI_PAGE_ALIGN : mi_page_info_size());
-  #if MI_ARENA_GUARD_PAGE_SIZE == 0
+  #if MI_SECURE < 2
   const size_t slice_count = mi_slice_count_of_size(info_size + block_size);
   #else
-  const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, MI_ARENA_GUARD_PAGE_SIZE) + MI_ARENA_GUARD_PAGE_SIZE);
+  const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, _mi_os_secure_guard_page_size()) + _mi_os_secure_guard_page_size());
   #endif
 
   mi_page_t* page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq);

From 9bad269c518a4104ac13584bc9474e0e357efd1c Mon Sep 17 00:00:00 2001
From: daanx <daan@effp.org>
Date: Mon, 23 Dec 2024 16:47:01 -0800
Subject: [PATCH 5/5] fix purge delay check for arenas

---
 src/arena.c   | 2 +-
 src/options.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 9ae44d85..af0d1d0a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1551,7 +1551,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld)
   mi_subproc_t* subproc = tld->subproc;
   const mi_msecs_t now = _mi_clock_now();
   mi_msecs_t arenas_expire = mi_atomic_load_acquire(&subproc->purge_expire);
-  if (!force && (arenas_expire == 0 || arenas_expire < now)) return;
+  if (!force && (arenas_expire == 0 || arenas_expire > now)) return;
 
   const size_t max_arena = mi_arenas_get_count(subproc);
   if (max_arena == 0) return;
diff --git a/src/options.c b/src/options.c
index 7562cd46..63d8a68f 100644
--- a/src/options.c
+++ b/src/options.c
@@ -144,7 +144,7 @@ static mi_option_desc_t options[_mi_option_last] =
 #else
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },         // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
-  { 1000,UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) },  // purge delay in milli-seconds
+  { 500, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) },  // purge delay in milli-seconds
   { 0,   UNINIT, MI_OPTION(use_numa_nodes) },           // 0 = use available numa nodes, otherwise use at most N nodes.
   { 0,   UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) },           // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100, UNINIT, MI_OPTION(os_tag) },                   // only apple specific for now but might serve more or less related purpose