diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4729e5b5..5fc1808e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,7 +26,7 @@ option(MI_BUILD_OBJECT      "Build object library" ON)
 option(MI_BUILD_TESTS       "Build test executables" ON)
 option(MI_DEBUG_TSAN        "Build with thread sanitizer (needs clang)" OFF)
 option(MI_DEBUG_UBSAN       "Build with undefined-behavior sanitizer (needs clang++)" OFF)
-option(MI_DEBUG_GUARDED     "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
+option(MI_GUARDED           "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
 option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF)
 option(MI_NO_PADDING        "Force no use of padding even in DEBUG mode etc." OFF)
 option(MI_INSTALL_TOPLEVEL  "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF)
@@ -207,9 +207,9 @@ if(MI_TRACK_ETW)
   endif()
 endif()
 
-if(MI_DEBUG_GUARDED)
-  message(STATUS "Compile guard pages behind certain object allocations (MI_DEBUG_GUARDED=ON)")
-  list(APPEND mi_defines MI_DEBUG_GUARDED=1)
+if(MI_GUARDED)
+  message(STATUS "Compile guard pages behind certain object allocations (MI_GUARDED=ON)")
+  list(APPEND mi_defines MI_GUARDED=1)
   if(NOT MI_NO_PADDING)
     message(STATUS "  Disabling padding due to guard pages (MI_NO_PADDING=ON)")
     set(MI_NO_PADDING ON)
@@ -320,13 +320,13 @@ if(MI_WIN_USE_FLS)
 endif()
 
 
- # Check /proc/cpuinfo for an SV39 MMU and define a constant if one is
- # found. We will want to skip the aligned hinting in that case. Issue #939, #949
+ # Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits.
+ # (this will skip the aligned hinting in that case. Issue #939, #949)
  if (EXISTS /proc/cpuinfo)
    file(STRINGS /proc/cpuinfo mi_sv39_mmu REGEX "^mmu[ \t]+:[ \t]+sv39$")
    if (mi_sv39_mmu)
-     MESSAGE( STATUS "Disable aligned hints (SV39 MMU detected)" )
-     list(APPEND mi_defines MI_NO_ALIGNED_HINT=1)
+     MESSAGE( STATUS "Set virtual address bits to 39 (SV39 MMU detected)" )
+     list(APPEND mi_defines MI_DEFAULT_VIRTUAL_ADDRESS_BITS=39)
    endif()
  endif()
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 6f1cd256..a481ac48 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -15,7 +15,7 @@ trigger:
 
 jobs:
 - job:
-  displayName: Windows
+  displayName: Windows 2022
   pool:
     vmImage:
       windows-2022
@@ -52,7 +52,7 @@ jobs:
   #  artifact: mimalloc-windows-$(BuildType)
 
 - job:
-  displayName: Linux
+  displayName: Ubuntu 22.04
   pool:
     vmImage:
      ubuntu-22.04
@@ -117,8 +117,8 @@ jobs:
         CC: clang
         CXX: clang
         BuildType: debug-guarded-clang
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMI_DEBUG_FULL=ON -DMI_DEBUG_GUARDED=ON
-      
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMI_DEBUG_FULL=ON -DMI_GUARDED=ON
+
   steps:
   - task: CMake@1
     inputs:
@@ -129,16 +129,16 @@ jobs:
   - script: ctest --verbose --timeout 180
     workingDirectory: $(BuildType)
     displayName: CTest
-    env: 
-      MIMALLOC_DEBUG_GUARDED_MAX: 1024
+    env:
+      MIMALLOC_GUARDED_SAMPLE_RATE: 1000
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-ubuntu-$(BuildType)
 
 - job:
-  displayName: macOS
+  displayName: macOS 14 (Sonoma)
   pool:
     vmImage:
-      macOS-latest
+      macOS-14
   strategy:
     matrix:
       Debug:
@@ -164,35 +164,145 @@ jobs:
 #  - upload: $(Build.SourcesDirectory)/$(BuildType)
 #    artifact: mimalloc-macos-$(BuildType)
 
-# - job:
-#   displayName: Windows-2017
-#   pool:
-#     vmImage:
-#       vs2017-win2016
-#   strategy:
-#     matrix:
-#       Debug:
-#         BuildType: debug
-#         cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
-#         MSBuildConfiguration: Debug
-#       Release:
-#         BuildType: release
-#         cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Release
-#         MSBuildConfiguration: Release
-#       Secure:
-#         BuildType: secure
-#         cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
-#         MSBuildConfiguration: Release
-#   steps:
-#   - task: CMake@1
-#     inputs:
-#       workingDirectory: $(BuildType)
-#       cmakeArgs: .. $(cmakeExtraArgs)
-#   - task: MSBuild@1
-#     inputs:
-#       solution: $(BuildType)/libmimalloc.sln
-#       configuration: '$(MSBuildConfiguration)'
-#   - script: |
-#       cd $(BuildType)
-#       ctest --verbose --timeout 180
-#     displayName: CTest
+# ----------------------------------------------------------
+# Other OS versions (just debug mode)
+# ----------------------------------------------------------
+
+- job:
+  displayName: Windows 2019
+  pool:
+    vmImage:
+      windows-2019
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+        MSBuildConfiguration: Debug
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+        MSBuildConfiguration: Release
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - task: MSBuild@1
+    inputs:
+      solution: $(BuildType)/libmimalloc.sln
+      configuration: '$(MSBuildConfiguration)'
+      msbuildArguments: -m
+  - script: ctest --verbose --timeout 180 -C $(MSBuildConfiguration)
+    workingDirectory: $(BuildType)
+    displayName: CTest
+
+- job:
+  displayName: Ubuntu 24.04
+  pool:
+    vmImage:
+      ubuntu-24.04
+  strategy:
+    matrix:
+      Debug:
+        CC: gcc
+        CXX: g++
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Debug++:
+        CC: gcc
+        CXX: g++
+        BuildType: debug-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
+      Debug Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Debug++ Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-clang-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
+      Release Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: release-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(nproc) -C $(BuildType)
+    displayName: Make
+  - script: ctest --verbose --timeout 180
+    workingDirectory: $(BuildType)
+    displayName: CTest
+
+- job:
+  displayName: Ubuntu 20.04
+  pool:
+    vmImage:
+      ubuntu-20.04
+  strategy:
+    matrix:
+      Debug:
+        CC: gcc
+        CXX: g++
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Debug++:
+        CC: gcc
+        CXX: g++
+        BuildType: debug-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
+      Debug Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Debug++ Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-clang-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
+      Release Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: release-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(nproc) -C $(BuildType)
+    displayName: Make
+  - script: ctest --verbose --timeout 180
+    workingDirectory: $(BuildType)
+    displayName: CTest
+
+- job:
+  displayName: macOS 15 (Sequia)
+  pool:
+    vmImage:
+      macOS-15
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
+    displayName: Make
+  - script: ctest --verbose --timeout 180
+    workingDirectory: $(BuildType)
+    displayName: CTest
diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll
index ed001d64..4702fec0 100644
Binary files a/bin/mimalloc-redirect.dll and b/bin/mimalloc-redirect.dll differ
diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll
index ec4ff1d5..17c05550 100644
Binary files a/bin/mimalloc-redirect32.dll and b/bin/mimalloc-redirect32.dll differ
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 9e0b31f2..160f1436 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -116,7 +116,7 @@
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>Default</ConformanceMode>
       <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <PreprocessorDefinitions>MI_DEBUG=4;MI_GUARDED=1;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
       <LanguageStandard>stdcpp20</LanguageStandard>
diff --git a/include/mimalloc.h b/include/mimalloc.h
index df85a2c0..534f1cbf 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -291,7 +291,7 @@ mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t a
 #endif
 
 
-// Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them) 
+// Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them)
 // Used for example for separate interpreter's in one process.
 typedef void* mi_subproc_id_t;
 mi_decl_export mi_subproc_id_t mi_subproc_main(void);
@@ -310,6 +310,12 @@ mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_ex(int heap_tag, bool al
 // deprecated
 mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
 
+// Experimental: objects followed by a guard page.
+// A sample rate of 0 disables guarded objects, while 1 uses a guard page for every object.
+// A seed of 0 uses a random start point. Only objects within the size bound are eligable for guard pages.
+mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed);
+mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max);
+
 
 // ------------------------------------------------------
 // Convenience
@@ -350,7 +356,7 @@ typedef enum mi_option_e {
   mi_option_deprecated_segment_cache,
   mi_option_deprecated_page_reset,
   mi_option_abandoned_page_purge,       // immediately purge delayed purges on thread termination
-  mi_option_deprecated_segment_reset, 
+  mi_option_deprecated_segment_reset,
   mi_option_eager_commit_delay,         // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
   mi_option_purge_delay,                // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10)
   mi_option_use_numa_nodes,             // 0 = use all available numa nodes, otherwise use at most N nodes.
@@ -367,8 +373,11 @@ typedef enum mi_option_e {
   mi_option_disallow_arena_alloc,       // 1 = do not use arena's for allocation (except if using specific arena id's)
   mi_option_retry_on_oom,               // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
   mi_option_visit_abandoned,            // allow visiting heap blocks from abandoned threads (=0)
-  mi_option_debug_guarded_min,          // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects (=0)
-  mi_option_debug_guarded_max,          // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects (=0)
+  mi_option_guarded_min,                // only used when building with MI_GUARDED: minimal rounded object size for guarded objects (=0)
+  mi_option_guarded_max,                // only used when building with MI_GUARDED: maximal rounded object size for guarded objects (=0)
+  mi_option_guarded_precise,            // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0)
+  mi_option_guarded_sample_rate,        // 1 out of N allocations in the min/max range will be guarded (=1000)
+  mi_option_guarded_sample_seed,        // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0)
   mi_option_target_segments_per_thread, // experimental (=0)
   _mi_option_last,
   // legacy option names
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index dcbaf15d..91897b9d 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -91,6 +91,7 @@ void       _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
 mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
 mi_heap_t*    _mi_heap_main_get(void);     // statically allocated main backing heap
 mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
+void       _mi_heap_guarded_init(mi_heap_t* heap);
 
 // os.c
 void       _mi_os_init(void);                                            // called from process init
@@ -641,16 +642,40 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
   page->flags.x.has_aligned = has_aligned;
 }
 
-#if MI_DEBUG_GUARDED
-static inline bool mi_page_has_guarded(const mi_page_t* page) {
-  return page->flags.x.has_guarded;
+/* -------------------------------------------------------------------
+  Guarded objects
+------------------------------------------------------------------- */
+#if MI_GUARDED
+static inline bool mi_block_ptr_is_guarded(const mi_block_t* block, const void* p) {
+  const ptrdiff_t offset = (uint8_t*)p - (uint8_t*)block;
+  return (offset >= (ptrdiff_t)(sizeof(mi_block_t)) && block->next == MI_BLOCK_TAG_GUARDED);
 }
 
-static inline void mi_page_set_has_guarded(mi_page_t* page, bool has_guarded) {
-  page->flags.x.has_guarded = has_guarded;
+static inline bool mi_heap_malloc_use_guarded(mi_heap_t* heap, size_t size) {
+  // this code is written to result in fast assembly as it is on the hot path for allocation
+  const size_t count = heap->guarded_sample_count - 1;  // if the rate was 0, this will underflow and count for a long time..
+  if mi_likely(count != 0) {
+    // no sample
+    heap->guarded_sample_count = count;
+    return false;
+  }
+  else if (size >= heap->guarded_size_min && size <= heap->guarded_size_max) {
+    // use guarded allocation
+    heap->guarded_sample_count = heap->guarded_sample_rate;  // reset
+    return (heap->guarded_sample_rate != 0);
+  }
+  else {
+    // failed size criteria, rewind count (but don't write to an empty heap)
+    if (heap->guarded_sample_rate != 0) { heap->guarded_sample_count = 1; } 
+    return false;
+  }  
 }
+
+mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
+
 #endif
 
+
 /* -------------------------------------------------------------------
 Encoding/Decoding the free list next pointers
 
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index 640c966f..f8bf948e 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -25,6 +25,8 @@ typedef struct mi_os_mem_config_s {
   size_t  page_size;            // default to 4KiB
   size_t  large_page_size;      // 0 if not supported, usually 2MiB (4MiB on Windows)
   size_t  alloc_granularity;    // smallest allocation size (usually 4KiB, on Windows 64KiB)
+  size_t  physical_memory;      // physical memory size
+  size_t  virtual_address_bits; // usually 48 or 56 bits on 64-bit systems. (used to determine secure randomization)
   bool    has_overcommit;       // can we reserve more memory than can be actually committed?
   bool    has_partial_free;     // can allocated blocks be freed partially? (true for mmap, false for VirtualAlloc)
   bool    has_virtual_reserve;  // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory)
@@ -41,9 +43,10 @@ int _mi_prim_free(void* addr, size_t size );
 // If `commit` is false, the virtual memory range only needs to be reserved (with no access)
 // which will later be committed explicitly using `_mi_prim_commit`.
 // `is_zero` is set to true if the memory was zero initialized (as on most OS's)
+// The `hint_addr` address is either `NULL` or a preferred allocation address but can be ignored.
 // pre: !commit => !allow_large
 //      try_alignment >= _mi_os_page_size() and a power of 2
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr);
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr);
 
 // Commit memory. Returns error code or 0 on success.
 // For example, on Linux this would make the memory PROT_READ|PROT_WRITE.
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 044d6eae..aa5f9996 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -75,8 +75,8 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Use guard pages behind objects of a certain size (set by the MIMALLOC_DEBUG_GUARDED_MIN/MAX options)
 // Padding should be disabled when using guard pages
-// #define MI_DEBUG_GUARDED 1
-#if defined(MI_DEBUG_GUARDED)
+// #define MI_GUARDED 1
+#if defined(MI_GUARDED)
 #define MI_PADDING  0
 #endif
 
@@ -244,6 +244,13 @@ typedef struct mi_block_s {
   mi_encoded_t next;
 } mi_block_t;
 
+#if MI_GUARDED
+// we always align guarded pointers in a block at an offset
+// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
+#define MI_BLOCK_TAG_ALIGNED   ((mi_encoded_t)(0))
+#define MI_BLOCK_TAG_GUARDED   (~MI_BLOCK_TAG_ALIGNED)
+#endif
+
 
 // The delayed flags are used for efficient multi-threaded free-ing
 typedef enum mi_delayed_e {
@@ -262,7 +269,6 @@ typedef union mi_page_flags_s {
   struct {
     uint8_t in_full : 1;
     uint8_t has_aligned : 1;
-    uint8_t has_guarded : 1;  // only used with MI_DEBUG_GUARDED
   } x;
 } mi_page_flags_t;
 #else
@@ -272,7 +278,6 @@ typedef union mi_page_flags_s {
   struct {
     uint8_t in_full;
     uint8_t has_aligned;
-    uint8_t has_guarded; // only used with MI_DEBUG_GUARDED
   } x;
 } mi_page_flags_t;
 #endif
@@ -556,6 +561,13 @@ struct mi_heap_s {
   mi_heap_t*            next;                                // list of heaps per thread
   bool                  no_reclaim;                          // `true` if this heap should not reclaim abandoned pages
   uint8_t               tag;                                 // custom tag, can be used for separating heaps based on the object types
+  #if MI_GUARDED
+  size_t                guarded_size_min;                    // minimal size for guarded objects
+  size_t                guarded_size_max;                    // maximal size for guarded objects
+  size_t                guarded_sample_rate;                 // sample rate (set to 0 to disable guarded pages)
+  size_t                guarded_sample_seed;                 // starting sample count
+  size_t                guarded_sample_count;                // current sample count (counting down to 0)
+  #endif
   mi_page_t*            pages_free_direct[MI_PAGES_DIRECT];  // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
   mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
 };
@@ -649,6 +661,7 @@ typedef struct mi_stats_s {
   mi_stat_counter_t arena_count;
   mi_stat_counter_t arena_crossover_count;
   mi_stat_counter_t arena_rollback_count;
+  mi_stat_counter_t guarded_alloc_count;
 #if MI_STAT>1
   mi_stat_count_t normal_bins[MI_BIN_HUGE+1];
 #endif
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 3d987bdd..9b5a6bd1 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -20,14 +20,36 @@ static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) {
   mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0));
   if (alignment > size) return false;
   if (alignment <= MI_MAX_ALIGN_SIZE) return true;
-  #if MI_DEBUG_GUARDED
-  return false;
-  #else
   const size_t bsize = mi_good_size(size);
   return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0);
-  #endif
 }
 
+#if MI_GUARDED
+static mi_decl_restrict void* mi_heap_malloc_guarded_aligned(mi_heap_t* heap, size_t size, size_t alignment, bool zero) mi_attr_noexcept {
+  // use over allocation for guarded blocksl
+  mi_assert_internal(alignment > 0 && alignment < MI_BLOCK_ALIGNMENT_MAX);
+  const size_t oversize = size + alignment - 1;
+  void* base = _mi_heap_malloc_guarded(heap, oversize, zero);
+  void* p = mi_align_up_ptr(base, alignment);
+  mi_track_align(base, p, (uint8_t*)p - (uint8_t*)base, size);
+  mi_assert_internal(mi_usable_size(p) >= size);
+  mi_assert_internal(_mi_is_aligned(p, alignment));
+  return p;
+}
+
+static void* mi_heap_malloc_zero_no_guarded(mi_heap_t* heap, size_t size, bool zero) {
+  const size_t rate = heap->guarded_sample_rate;
+  heap->guarded_sample_rate = 0;
+  void* p = _mi_heap_malloc_zero(heap, size, zero);
+  heap->guarded_sample_rate = rate;
+  return p;
+}
+#else
+static void* mi_heap_malloc_zero_no_guarded(mi_heap_t* heap, size_t size, bool zero) {
+  return _mi_heap_malloc_zero(heap, size, zero);
+}
+#endif
+
 // Fallback aligned allocation that over-allocates -- split out for better codegen
 static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
 {
@@ -48,6 +70,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
       return NULL;
     }
     oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
+    // note: no guarded as alignment > 0
     p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
     // zero afterwards as only the area from the aligned_p may be committed!
     if (p == NULL) return NULL;
@@ -55,11 +78,11 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
   else {
     // otherwise over-allocate
     oversize = size + alignment - 1;
-    p = _mi_heap_malloc_zero(heap, oversize, zero);
+    p = mi_heap_malloc_zero_no_guarded(heap, oversize, zero);
     if (p == NULL) return NULL;
   }
   mi_page_t* page = _mi_ptr_page(p);
-  
+
   // .. and align within the allocation
   const uintptr_t align_mask = alignment - 1;  // for any x, `(x & align_mask) == (x % alignment)`
   const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
@@ -68,6 +91,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
   void* aligned_p = (void*)((uintptr_t)p + adjust);
   if (aligned_p != p) {
     mi_page_set_has_aligned(page, true);
+    #if MI_GUARDED
+    // set tag to aligned so mi_usable_size works with guard pages
+    if (adjust >= sizeof(mi_block_t)) {
+      mi_block_t* const block = (mi_block_t*)p;
+      block->next = MI_BLOCK_TAG_ALIGNED;
+    }
+    #endif
     _mi_padding_shrink(page, (mi_block_t*)p, adjust + size);
   }
   // todo: expand padding if overallocated ?
@@ -76,8 +106,10 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
   mi_assert_internal(mi_usable_size(aligned_p)>=size);
   mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust);
-  #if !MI_DEBUG_GUARDED
-  mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_page(aligned_p), aligned_p));
+  #if MI_DEBUG > 1
+  mi_page_t* const apage = _mi_ptr_page(aligned_p);
+  void* unalign_p = _mi_page_ptr_unalign(apage, aligned_p);
+  mi_assert_internal(p == unalign_p);
   #endif
 
   // now zero the block if needed
@@ -91,6 +123,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
 
   if (p != aligned_p) {
     mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
+    #if MI_GUARDED
+    mi_track_mem_defined(p, sizeof(mi_block_t));
+    #endif
   }
   return aligned_p;
 }
@@ -100,27 +135,27 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t*
 {
   mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
   // we don't allocate more than MI_MAX_ALLOC_SIZE (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
-  if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { 
+  if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) {
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
     #endif
     return NULL;
   }
-  
+
   // use regular allocation if it is guaranteed to fit the alignment constraints.
   // this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist
   // a page with the right block size, and if we always use the over-alloc fallback that would never happen.
   if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) {
-    void* p = _mi_heap_malloc_zero(heap, size, zero);
+    void* p = mi_heap_malloc_zero_no_guarded(heap, size, zero);
     mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
-    const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0;  
+    const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0;
     if mi_likely(is_aligned_or_null) {
       return p;
     }
     else {
       // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct..
       mi_assert(false);
-      mi_free(p); 
+      mi_free(p);
     }
   }
 
@@ -128,6 +163,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t*
   return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero);
 }
 
+
 // Primitive aligned allocation
 static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
 {
@@ -138,12 +174,17 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
     #endif
     return NULL;
   }
-  
-  #if !MI_DEBUG_GUARDED
+
+  #if MI_GUARDED
+  if (offset==0 && alignment < MI_BLOCK_ALIGNMENT_MAX && mi_heap_malloc_use_guarded(heap,size)) {
+    return mi_heap_malloc_guarded_aligned(heap, size, alignment, zero);
+  }
+  #endif
+
   // try first if there happens to be a small block available with just the right alignment
   if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) {
     const uintptr_t align_mask = alignment-1;       // for any x, `(x & align_mask) == (x % alignment)`
-    const size_t padsize = size + MI_PADDING_SIZE;  
+    const size_t padsize = size + MI_PADDING_SIZE;
     mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
     if mi_likely(page->free != NULL) {
       const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
@@ -160,7 +201,6 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
       }
     }
   }
-  #endif
 
   // fallback to generic aligned allocation
   return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero);
@@ -318,3 +358,5 @@ mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t
 mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
   return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment);
 }
+
+
diff --git a/src/alloc.c b/src/alloc.c
index 70767e5b..ffa7b8b7 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -31,22 +31,22 @@ terms of the MIT license. A copy of the license can be found in the file
 extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
 {
   mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
-  
+
   // check the free list
   mi_block_t* const block = page->free;
   if mi_unlikely(block == NULL) {
     return _mi_malloc_generic(heap, size, zero, 0);
   }
   mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
-  
+
   // pop from the free list
   page->free = mi_block_next(page, block);
   page->used++;
   mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
   mi_assert_internal(page->block_size < MI_MAX_ALIGN_SIZE || _mi_is_aligned(block, MI_MAX_ALIGN_SIZE));
-  
+
   #if MI_DEBUG>3
-  if (page->free_is_zero && size > sizeof(*block)) { 
+  if (page->free_is_zero && size > sizeof(*block)) {
     mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block)));
   }
   #endif
@@ -121,10 +121,8 @@ extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t siz
   return _mi_page_malloc_zero(heap,page,size,true);
 }
 
-#if MI_DEBUG_GUARDED
-static mi_decl_restrict void* mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
-static inline bool mi_heap_malloc_use_guarded(size_t size, bool has_huge_alignment);
-static inline bool mi_heap_malloc_small_use_guarded(size_t size);
+#if MI_GUARDED
+mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
 #endif
 
 static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
@@ -134,11 +132,13 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
   const uintptr_t tid = _mi_thread_id();
   mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local
   #endif
-  #if (MI_PADDING || MI_DEBUG_GUARDED)
+  #if (MI_PADDING || MI_GUARDED)
   if (size == 0) { size = sizeof(void*); }
   #endif
-  #if MI_DEBUG_GUARDED
-  if (mi_heap_malloc_small_use_guarded(size)) { return mi_heap_malloc_guarded(heap, size, zero); }
+  #if MI_GUARDED
+  if (mi_heap_malloc_use_guarded(heap,size)) {
+    return _mi_heap_malloc_guarded(heap, size, zero);
+  }
   #endif
 
   // get page in constant time, and allocate from it
@@ -171,13 +171,15 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si
 
 // The main allocation function
 extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept {
-  // fast path for small objects 
+  // fast path for small objects
   if mi_likely(size <= MI_SMALL_SIZE_MAX) {
     mi_assert_internal(huge_alignment == 0);
     return mi_heap_malloc_small_zero(heap, size, zero);
   }
-  #if MI_DEBUG_GUARDED
-  else if (mi_heap_malloc_use_guarded(size,huge_alignment>0)) { return mi_heap_malloc_guarded(heap, size, zero); }
+  #if MI_GUARDED
+  else if (huge_alignment==0 && mi_heap_malloc_use_guarded(heap,size)) {
+    return _mi_heap_malloc_guarded(heap, size, zero);
+  }
   #endif
   else {
     // regular allocation
@@ -185,7 +187,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z
     mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id());   // heaps are thread local
     void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment);  // note: size can overflow but it is detected in malloc_generic
     mi_track_malloc(p,size,zero);
-    
+
     #if MI_STAT>1
     if (p != NULL) {
       if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
@@ -601,69 +603,73 @@ mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) {
   }
 }
 
-#if MI_DEBUG_GUARDED
-static inline bool mi_heap_malloc_small_use_guarded(size_t size) {
-  return (size <= (size_t)_mi_option_get_fast(mi_option_debug_guarded_max) 
-          && size >= (size_t)_mi_option_get_fast(mi_option_debug_guarded_min));
+#if MI_GUARDED
+// We always allocate a guarded allocation at an offset (`mi_page_has_aligned` will be true).
+// We then set the first word of the block to `0` for regular offset aligned allocations (in `alloc-aligned.c`)
+// and the first word to `~0` for guarded allocations to have a correct `mi_usable_size`
+
+static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
+  // TODO: we can still make padding work by moving it out of the guard page area
+  mi_page_t* const page = _mi_ptr_page(block);
+  mi_page_set_has_aligned(page, true);
+  block->next = MI_BLOCK_TAG_GUARDED;
+
+  // set guard page at the end of the block
+  mi_segment_t* const segment = _mi_page_segment(page);
+  const size_t block_size = mi_page_block_size(page);  // must use `block_size` to match `mi_free_local`
+  const size_t os_page_size = _mi_os_page_size();
+  mi_assert_internal(block_size >= obj_size + os_page_size + sizeof(mi_block_t));
+  if (block_size < obj_size + os_page_size + sizeof(mi_block_t)) {
+    // should never happen
+    mi_free(block);
+    return NULL;
+  }
+  uint8_t* guard_page = (uint8_t*)block + block_size - os_page_size;
+  mi_assert_internal(_mi_is_aligned(guard_page, os_page_size));
+  if (segment->allow_decommit && _mi_is_aligned(guard_page, os_page_size)) {
+    _mi_os_protect(guard_page, os_page_size);
+  }
+  else {
+    _mi_warning_message("unable to set a guard page behind an object due to pinned memory (large OS pages?) (object %p of size %zu)\n", block, block_size);
+  }
+
+  // align pointer just in front of the guard page
+  size_t offset = block_size - os_page_size - obj_size;
+  mi_assert_internal(offset > sizeof(mi_block_t));
+  if (offset > MI_BLOCK_ALIGNMENT_MAX) {
+    // give up to place it right in front of the guard page if the offset is too large for unalignment
+    offset = MI_BLOCK_ALIGNMENT_MAX;
+  }
+  void* p = (uint8_t*)block + offset;  
+  mi_track_align(block, p, offset, obj_size);
+  mi_track_mem_defined(block, sizeof(mi_block_t));
+  return p;
 }
 
-static inline bool mi_heap_malloc_use_guarded(size_t size, bool has_huge_alignment) {
-  return (!has_huge_alignment  // guarded pages do not work with huge aligments at the moment
-          && _mi_option_get_fast(mi_option_debug_guarded_max) > 0  // guarded must be enabled
-          && (mi_heap_malloc_small_use_guarded(size)
-              || ((mi_good_size(size) & (_mi_os_page_size() - 1)) == 0))  // page-size multiple are always guarded so we can have a correct `mi_usable_size`.
-         );
-}
-
-static mi_decl_restrict void* mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
+mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
 {
   #if defined(MI_PADDING_SIZE)
   mi_assert(MI_PADDING_SIZE==0);
   #endif
   // allocate multiple of page size ending in a guard page
-  const size_t obj_size  = _mi_align_up(size, MI_MAX_ALIGN_SIZE); // ensure minimal alignment requirement
+  // ensure minimal alignment requirement?
   const size_t os_page_size = _mi_os_page_size();
-  const size_t req_size  = _mi_align_up(obj_size + os_page_size, os_page_size);
-  void* const block = _mi_malloc_generic(heap, req_size, zero, 0 /* huge_alignment */);
+  const size_t obj_size = (mi_option_is_enabled(mi_option_guarded_precise) ? size : _mi_align_up(size, MI_MAX_ALIGN_SIZE));
+  const size_t bsize    = _mi_align_up(_mi_align_up(obj_size, MI_MAX_ALIGN_SIZE) + sizeof(mi_block_t), MI_MAX_ALIGN_SIZE);
+  const size_t req_size = _mi_align_up(bsize + os_page_size, os_page_size);
+  mi_block_t* const block = (mi_block_t*)_mi_malloc_generic(heap, req_size, zero, 0 /* huge_alignment */);
   if (block==NULL) return NULL;
-  mi_page_t* page = _mi_ptr_page(block);
-  mi_segment_t* segment = _mi_page_segment(page);
-
-  const size_t block_size = mi_page_block_size(page);  // must use `block_size` to match `mi_free_local`
-  void* const guard_page  = (uint8_t*)block + (block_size - os_page_size);
-  mi_assert_internal(_mi_is_aligned(guard_page, os_page_size));
-
-  // place block in front of the guard page
-  size_t offset = block_size - os_page_size - obj_size;
-  if (offset > MI_BLOCK_ALIGNMENT_MAX) {
-    // give up to place it right in front of the guard page if the offset is too large for unalignment
-    offset = MI_BLOCK_ALIGNMENT_MAX;
-  }
-  void* const p = (uint8_t*)block + offset;
-  mi_assert_internal(p>=block);
-
-  // set page flags
-  if (offset > 0) {
-    mi_page_set_has_aligned(page, true);
-  }
-
-  // set guard page
-  if (segment->allow_decommit) {
-    mi_page_set_has_guarded(page, true);
-    _mi_os_protect(guard_page, os_page_size);
-  }
-  else {
-    _mi_warning_message("unable to set a guard page behind an object due to pinned memory (large OS pages?) (object %p of size %zu)\n", p, size);
-  }
+  void* const p   = mi_block_ptr_set_guarded(block, obj_size);
 
   // stats
-  mi_track_malloc(p, size, zero);
-  #if MI_STAT>1
+  mi_track_malloc(p, size, zero);  
   if (p != NULL) {
     if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
+    #if MI_STAT>1
     mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
+    #endif
+    _mi_stat_counter_increase(&heap->tld->stats.guarded_alloc_count, 1);
   }
-  #endif
   #if MI_DEBUG>3
   if (p != NULL && zero) {
     mi_assert_expensive(mi_mem_is_zero(p, size));
diff --git a/src/arena.c b/src/arena.c
index d2039623..686500b4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -293,7 +293,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no
                                        bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
 {
   MI_UNUSED_RELEASE(alignment);
-  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
+  mi_assert(alignment <= MI_SEGMENT_ALIGN);
   const size_t bcount = mi_block_count_of_size(size);
   const size_t arena_index = mi_arena_id_index(arena_id);
   mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
diff --git a/src/free.c b/src/free.c
index ad162915..f856da77 100644
--- a/src/free.c
+++ b/src/free.c
@@ -34,7 +34,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
   if mi_unlikely(mi_check_is_double_free(page, block)) return;
   mi_check_padding(page, block);
   if (track_stats) { mi_stat_free(page, block); }
-  #if (MI_DEBUG>0) && !MI_TRACK_ENABLED  && !MI_TSAN && !MI_DEBUG_GUARDED
+  #if (MI_DEBUG>0) && !MI_TRACK_ENABLED  && !MI_TSAN && !MI_GUARDED
   if (!mi_page_is_huge(page)) {   // huge page content may be already decommitted
     memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
   }
@@ -71,21 +71,30 @@ mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
   return (mi_block_t*)((uintptr_t)p - adjust);
 }
 
-// forward declaration for a MI_DEBUG_GUARDED build
-static void mi_block_unguard(mi_page_t* page, mi_block_t* block);
+// forward declaration for a MI_GUARDED build
+#if MI_GUARDED
+static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p); // forward declaration
+static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, void* p) {
+  if (mi_block_ptr_is_guarded(block, p)) { mi_block_unguard(page, block, p); }
+}
+#else
+static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, void* p) {
+  MI_UNUSED(page); MI_UNUSED(block); MI_UNUSED(p);
+}
+#endif
 
 // free a local pointer  (page parameter comes first for better codegen)
 static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
   MI_UNUSED(segment);
   mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
-  mi_block_unguard(page,block);
+  mi_block_check_unguard(page, block, p);
   mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
 }
 
 // free a pointer owned by another thread (page parameter comes first for better codegen)
 static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
   mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
-  mi_block_unguard(page, block);
+  mi_block_check_unguard(page, block, p);
   mi_free_block_mt(page, segment, block);
 }
 
@@ -102,17 +111,17 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
 {
   MI_UNUSED(msg);
 
-#if (MI_DEBUG>0)
-  if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
+  #if (MI_DEBUG>0)
+  if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
     _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
     return NULL;
   }
-#endif
+  #endif
 
   mi_segment_t* const segment = _mi_ptr_segment(p);
   if mi_unlikely(segment==NULL) return segment;
 
-#if (MI_DEBUG>0)
+  #if (MI_DEBUG>0)
   if mi_unlikely(!mi_is_in_heap_region(p)) {
   #if (MI_INTPTR_SIZE == 8 && defined(__linux__))
     if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640)
@@ -126,13 +135,13 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
       }
     }
   }
-#endif
-#if (MI_DEBUG>0 || MI_SECURE>=4)
+  #endif
+  #if (MI_DEBUG>0 || MI_SECURE>=4)
   if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
     _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
     return NULL;
   }
-#endif
+  #endif
 
   return segment;
 }
@@ -305,20 +314,19 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p
   const size_t size = mi_page_usable_size_of(page, block);
   const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
   mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
-  return (size - adjust);
+  const size_t aligned_size = (size - adjust);
+  #if MI_GUARDED
+  if (mi_block_ptr_is_guarded(block, p)) {
+    return aligned_size - _mi_os_page_size();
+  }
+  #endif
+  return aligned_size;
 }
 
 static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
   const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
   if mi_unlikely(segment==NULL) return 0;
   const mi_page_t* const page = _mi_segment_page_of(segment, p);
-  #if MI_DEBUG_GUARDED
-  if (mi_page_has_guarded(page)) {
-    const size_t bsize = mi_page_usable_aligned_size_of(page, p);
-    mi_assert_internal(bsize > _mi_os_page_size());
-    return (bsize > _mi_os_page_size() ? bsize - _mi_os_page_size() : bsize);
-  } else
-  #endif
   if mi_likely(!mi_page_has_aligned(page)) {
     const mi_block_t* block = (const mi_block_t*)p;
     return mi_page_usable_size_of(page, block);
@@ -543,23 +551,21 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
 #endif
 
 
-// Remove guard page when building with MI_DEBUG_GUARDED
-#if !MI_DEBUG_GUARDED
-static void mi_block_unguard(mi_page_t* page, mi_block_t* block) {
-  MI_UNUSED(page);
-  MI_UNUSED(block);
-  // do nothing
-}
-#else
-static void mi_block_unguard(mi_page_t* page, mi_block_t* block) {
-  if (mi_page_has_guarded(page)) {
-    const size_t bsize = mi_page_block_size(page);
-    const size_t psize = _mi_os_page_size();
-    mi_assert_internal(bsize > psize);
-    mi_assert_internal(_mi_page_segment(page)->allow_decommit);
-    void* gpage = (uint8_t*)block + (bsize - psize);
-    mi_assert_internal(_mi_is_aligned(gpage, psize));
-    _mi_os_unprotect(gpage, psize);
-  }
+// Remove guard page when building with MI_GUARDED
+#if MI_GUARDED
+static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p) {
+  MI_UNUSED(p);
+  mi_assert_internal(mi_block_ptr_is_guarded(block, p));
+  mi_assert_internal(mi_page_has_aligned(page));
+  mi_assert_internal((uint8_t*)p - (uint8_t*)block >= (ptrdiff_t)sizeof(mi_block_t));
+  mi_assert_internal(block->next == MI_BLOCK_TAG_GUARDED);
+
+  const size_t bsize = mi_page_block_size(page);
+  const size_t psize = _mi_os_page_size();
+  mi_assert_internal(bsize > psize);
+  mi_assert_internal(_mi_page_segment(page)->allow_decommit);
+  void* gpage = (uint8_t*)block + bsize - psize;
+  mi_assert_internal(_mi_is_aligned(gpage, psize));
+  _mi_os_unprotect(gpage, psize);
 }
 #endif
diff --git a/src/heap.c b/src/heap.c
index b3fda0f6..154d4b80 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -228,6 +228,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool
   heap->cookie  = _mi_heap_random_next(heap) | 1;
   heap->keys[0] = _mi_heap_random_next(heap);
   heap->keys[1] = _mi_heap_random_next(heap);
+  _mi_heap_guarded_init(heap);
   // push on the thread local heaps list
   heap->next = heap->tld->heaps;
   heap->tld->heaps = heap;
@@ -381,8 +382,8 @@ void mi_heap_destroy(mi_heap_t* heap) {
   mi_assert(heap->no_reclaim);
   mi_assert_expensive(mi_heap_is_valid(heap));
   if (heap==NULL || !mi_heap_is_initialized(heap)) return;
-  #if MI_DEBUG_GUARDED
-  _mi_warning_message("'mi_heap_destroy' called but ignored as MI_DEBUG_GUARDED is enabled (heap at %p)\n", heap);
+  #if MI_GUARDED
+  // _mi_warning_message("'mi_heap_destroy' called but MI_GUARDED is enabled -- using `mi_heap_delete` instead (heap at %p)\n", heap);
   mi_heap_delete(heap);
   return;
   #else
diff --git a/src/init.c b/src/init.c
index a13edba6..ccaf9445 100644
--- a/src/init.c
+++ b/src/init.c
@@ -88,7 +88,7 @@ const mi_page_t _mi_page_empty = {
   { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
   { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
   { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
-  { 0, 0 } \
+  { 0, 0 }, { 0, 0 } \
   MI_STAT_COUNT_END_NULL()
 
 
@@ -125,6 +125,9 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   NULL,             // next
   false,            // can reclaim
   0,                // tag
+  #if MI_GUARDED
+  0, 0, 0, 0, 1,    // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
+  #endif
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY
 };
@@ -173,6 +176,9 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = {
   NULL,             // next heap
   false,            // can reclaim
   0,                // tag
+  #if MI_GUARDED
+  0, 0, 0, 0, 0,
+  #endif
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY
 };
@@ -181,6 +187,45 @@ bool _mi_process_is_initialized = false;  // set to `true` in `mi_process_init`.
 
 mi_stats_t _mi_stats_main = { MI_STATS_NULL };
 
+#if MI_GUARDED
+mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) {
+  heap->guarded_sample_seed = seed;
+  if (heap->guarded_sample_seed == 0) { 
+    heap->guarded_sample_seed = _mi_heap_random_next(heap); 
+  }
+  heap->guarded_sample_rate  = sample_rate;
+  if (heap->guarded_sample_rate >= 1) {
+    heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate;
+  }
+  heap->guarded_sample_count = heap->guarded_sample_seed;  // count down samples
+}
+
+mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) {
+  heap->guarded_size_min = min;
+  heap->guarded_size_max = (min > max ? min : max);
+}
+
+void _mi_heap_guarded_init(mi_heap_t* heap) {
+  mi_heap_guarded_set_sample_rate(heap,
+    (size_t)mi_option_get_clamp(mi_option_guarded_sample_rate, 0, LONG_MAX),
+    (size_t)mi_option_get(mi_option_guarded_sample_seed));
+  mi_heap_guarded_set_size_bound(heap, 
+    (size_t)mi_option_get_clamp(mi_option_guarded_min, 0, LONG_MAX),
+    (size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) );  
+}
+#else
+mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) {
+  MI_UNUSED(heap); MI_UNUSED(sample_rate); MI_UNUSED(seed);
+}
+
+mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) {
+  MI_UNUSED(heap); MI_UNUSED(min); MI_UNUSED(max);
+}
+void _mi_heap_guarded_init(mi_heap_t* heap) {
+  MI_UNUSED(heap);
+}
+#endif
+
 
 static void mi_heap_main_init(void) {
   if (_mi_heap_main.cookie == 0) {
@@ -196,6 +241,7 @@ static void mi_heap_main_init(void) {
     _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
     mi_lock_init(&mi_subproc_default.abandoned_os_lock);
     mi_lock_init(&mi_subproc_default.abandoned_os_visit_lock);
+    _mi_heap_guarded_init(&_mi_heap_main);
   }
 }
 
@@ -577,7 +623,7 @@ static void mi_detect_cpu_features(void) {
 }
 #else
 static void mi_detect_cpu_features(void) {
-  // nothing
+  // nothing 
 }
 #endif
 
diff --git a/src/options.c b/src/options.c
index ed1cf921..2a816096 100644
--- a/src/options.c
+++ b/src/options.c
@@ -47,7 +47,9 @@ typedef struct mi_option_desc_s {
 #define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
 #define MI_OPTION_LEGACY(opt,legacy)    mi_option_##opt, #opt, #legacy
 
-// Some options can be set at build time for statically linked libraries (use `-DMI_EXTRA_CPPDEFS="opt1=val1;opt2=val2"`)
+// Some options can be set at build time for statically linked libraries
+// (use `-DMI_EXTRA_CPPDEFS="opt1=val1;opt2=val2"`)
+//
 // This is useful if we cannot pass them as environment variables
 // (and setting them programmatically would be too late)
 
@@ -100,14 +102,19 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(show_stats) },
   { MI_DEFAULT_VERBOSE, UNINIT, MI_OPTION(verbose) },
 
-  // the following options are experimental and not all combinations make sense.
-  { MI_DEFAULT_EAGER_COMMIT, UNINIT, MI_OPTION(eager_commit) },               // commit per segment directly (4MiB)  (but see also `eager_commit_delay`)
-  { MI_DEFAULT_ARENA_EAGER_COMMIT, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux)
+  // some of the following options are experimental and not all combinations are allowed.
+  { MI_DEFAULT_EAGER_COMMIT,
+       UNINIT, MI_OPTION(eager_commit) },               // commit per segment directly (4MiB)  (but see also `eager_commit_delay`)
+  { MI_DEFAULT_ARENA_EAGER_COMMIT,
+       UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux)
   { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },        // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit)
-  { MI_DEFAULT_ALLOW_LARGE_OS_PAGES, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) },    // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
-  { MI_DEFAULT_RESERVE_HUGE_OS_PAGES, UNINIT, MI_OPTION(reserve_huge_os_pages) },      // per 1GiB huge pages
+  { MI_DEFAULT_ALLOW_LARGE_OS_PAGES,
+       UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) },    // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
+  { MI_DEFAULT_RESERVE_HUGE_OS_PAGES,
+       UNINIT, MI_OPTION(reserve_huge_os_pages) },      // per 1GiB huge pages
   {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) },   // reserve huge pages at node N
-  { MI_DEFAULT_RESERVE_OS_MEMORY, UNINIT, MI_OPTION(reserve_os_memory)     },      // reserve N KiB OS memory in advance (use `option_get_size`)
+  { MI_DEFAULT_RESERVE_OS_MEMORY,
+       UNINIT, MI_OPTION(reserve_os_memory)     },      // reserve N KiB OS memory in advance (use `option_get_size`)
   { 0, UNINIT, MI_OPTION(deprecated_segment_cache) },   // cache N segments per thread
   { 0, UNINIT, MI_OPTION(deprecated_page_reset) },      // reset page memory on free
   { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) },       // reset free page memory when a thread terminates
@@ -125,19 +132,26 @@ static mi_option_desc_t options[_mi_option_last] =
   { 32,  UNINIT, MI_OPTION(max_warnings) },             // maximum warnings that are output
   { 10,  UNINIT, MI_OPTION(max_segment_reclaim)},       // max. percentage of the abandoned segments to be reclaimed per try.
   { 0,   UNINIT, MI_OPTION(destroy_on_exit)},           // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
-  { MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) },    // reserve memory N KiB at a time (=1GiB) (use `option_get_size`)
+  { MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`)
   { 10,  UNINIT, MI_OPTION(arena_purge_mult) },         // purge delay multiplier for arena's
   { 1,   UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
   { 1,   UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
-  { MI_DEFAULT_DISALLOW_ARENA_ALLOC,   UNINIT, MI_OPTION(disallow_arena_alloc) },     // 1 = do not use arena's for allocation (except if using specific arena id's)
+  { MI_DEFAULT_DISALLOW_ARENA_ALLOC,   UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
   { 400, UNINIT, MI_OPTION(retry_on_oom) },             // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
-#if defined(MI_VISIT_ABANDONED)  
+#if defined(MI_VISIT_ABANDONED)
   { 1,   INITIALIZED, MI_OPTION(visit_abandoned) },     // allow visiting heap blocks in abandonded segments; requires taking locks during reclaim.
 #else
-  { 0,   UNINIT, MI_OPTION(visit_abandoned) },          
+  { 0,   UNINIT, MI_OPTION(visit_abandoned) },
 #endif
-  { 0,   UNINIT, MI_OPTION(debug_guarded_min) },        // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects
-  { 0,   UNINIT, MI_OPTION(debug_guarded_max) },        // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects
+  { 0,   UNINIT, MI_OPTION(guarded_min) },              // only used when building with MI_GUARDED: minimal rounded object size for guarded objects
+  { MI_GiB, UNINIT, MI_OPTION(guarded_max) },           // only used when building with MI_GUARDED: maximal rounded object size for guarded objects
+  { 0,   UNINIT, MI_OPTION(guarded_precise) },          // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0)
+#if MI_GUARDED
+  { 4000,UNINIT, MI_OPTION(guarded_sample_rate)},       // 1 out of N allocations in the min/max range will be guarded(= 1000)
+#else
+  { 0,   UNINIT, MI_OPTION(guarded_sample_rate)},
+#endif
+  { 0,   UNINIT, MI_OPTION(guarded_sample_seed)},
   { 0,   UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable.
 };
 
@@ -161,25 +175,25 @@ void _mi_options_init(void) {
   }
   mi_max_error_count = mi_option_get(mi_option_max_errors);
   mi_max_warning_count = mi_option_get(mi_option_max_warnings);
-  #if MI_DEBUG_GUARDED
-  if (mi_option_get(mi_option_debug_guarded_max) > 0) {
+  #if MI_GUARDED
+  if (mi_option_get(mi_option_guarded_sample_rate) > 0) {
     if (mi_option_is_enabled(mi_option_allow_large_os_pages)) {
       mi_option_disable(mi_option_allow_large_os_pages);
       _mi_warning_message("option 'allow_large_os_pages' is disabled to allow for guarded objects\n");
     }
   }
-  _mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_debug_guarded_max) > 0 ? "enabled" : "disabled");
+  _mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_guarded_max) > 0 ? "enabled" : "disabled");
   #endif
 }
 
 long _mi_option_get_fast(mi_option_t option) {
   mi_assert(option >= 0 && option < _mi_option_last);
-  mi_option_desc_t* desc = &options[option]; 
+  mi_option_desc_t* desc = &options[option];
   mi_assert(desc->option == option);  // index should match the option
   //mi_assert(desc->init != UNINIT);
   return desc->value;
 }
-  
+
 
 mi_decl_nodiscard long mi_option_get(mi_option_t option) {
   mi_assert(option >= 0 && option < _mi_option_last);
@@ -214,11 +228,11 @@ void mi_option_set(mi_option_t option, long value) {
   desc->value = value;
   desc->init = INITIALIZED;
   // ensure min/max range; be careful to not recurse.
-  if (desc->option == mi_option_debug_guarded_min && _mi_option_get_fast(mi_option_debug_guarded_max) < value) {
-    mi_option_set(mi_option_debug_guarded_max, value);
+  if (desc->option == mi_option_guarded_min && _mi_option_get_fast(mi_option_guarded_max) < value) {
+    mi_option_set(mi_option_guarded_max, value);
   }
-  else if (desc->option == mi_option_debug_guarded_max && _mi_option_get_fast(mi_option_debug_guarded_min) > value) {
-    mi_option_set(mi_option_debug_guarded_min, value);
+  else if (desc->option == mi_option_guarded_max && _mi_option_get_fast(mi_option_guarded_min) > value) {
+    mi_option_set(mi_option_guarded_min, value);
   }
 }
 
@@ -554,7 +568,7 @@ static void mi_option_init(mi_option_desc_t* desc) {
       char* end = buf;
       long value = strtol(buf, &end, 10);
       if (mi_option_has_size_in_kib(desc->option)) {
-        // this option is interpreted in KiB to prevent overflow of `long` for large allocations 
+        // this option is interpreted in KiB to prevent overflow of `long` for large allocations
         // (long is 32-bit on 64-bit windows, which allows for 4TiB max.)
         size_t size = (value < 0 ? 0 : (size_t)value);
         bool overflow = false;
@@ -569,7 +583,7 @@ static void mi_option_init(mi_option_desc_t* desc) {
         value = (size > LONG_MAX ? LONG_MAX : (long)size);
       }
       if (*end == 0) {
-        mi_option_set(desc->option, value);        
+        mi_option_set(desc->option, value);
       }
       else {
         // set `init` first to avoid recursion through _mi_warning_message on mimalloc_verbose.
diff --git a/src/os.c b/src/os.c
index b794b4da..967f5663 100644
--- a/src/os.c
+++ b/src/os.c
@@ -11,16 +11,33 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 /* -----------------------------------------------------------
-  Initialization. 
+  Initialization.
 ----------------------------------------------------------- */
+#ifndef MI_DEFAULT_VIRTUAL_ADDRESS_BITS
+#if MI_INTPTR_SIZE < 8
+#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS   32
+#else
+#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS   48
+#endif
+#endif
+
+#ifndef MI_DEFAULT_PHYSICAL_MEMORY
+#if MI_INTPTR_SIZE < 8
+#define MI_DEFAULT_PHYSICAL_MEMORY    4*MI_GiB
+#else
+#define MI_DEFAULT_PHYSICAL_MEMORY    32*MI_GiB
+#endif
+#endif
 
 static mi_os_mem_config_t mi_os_mem_config = {
-  4096,   // page size
-  0,      // large page size (usually 2MiB)
-  4096,   // allocation granularity
-  true,   // has overcommit?  (if true we use MAP_NORESERVE on mmap systems)
-  false,  // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
-  true    // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
+  4096,     // page size
+  0,        // large page size (usually 2MiB)
+  4096,     // allocation granularity
+  MI_DEFAULT_PHYSICAL_MEMORY,
+  MI_DEFAULT_VIRTUAL_ADDRESS_BITS,
+  true,     // has overcommit?  (if true we use MAP_NORESERVE on mmap systems)
+  false,    // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
+  true      // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
 };
 
 bool _mi_os_has_overcommit(void) {
@@ -76,9 +93,9 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats
   aligned hinting
 -------------------------------------------------------------- */
 
-// On 64-bit systems, we can do efficient aligned allocation by using
-// the 2TiB to 30TiB area to allocate those. We assume we have
-// at least 48 bits of virtual address space on 64-bit systems (but see issue #939)
+// On systems with enough virtual address bits, we can do efficient aligned allocation by using
+// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address
+// space (64TiB) we use this technique. (but see issue #939)
 #if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT)
 static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
 
@@ -96,6 +113,7 @@ static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
 void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
 {
   if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
+  if (mi_os_mem_config.virtual_address_bits < 46) return NULL;  // < 64TiB virtual address space
   size = _mi_align_up(size, MI_SEGMENT_SIZE);
   if (size > 1*MI_GiB) return NULL;  // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
   #if (MI_SECURE>0)
@@ -181,7 +199,8 @@ void  _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) {
 -------------------------------------------------------------- */
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) {
+// Also `hint_addr` is a hint and may be ignored.
+static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(is_zero != NULL);
   mi_assert_internal(is_large != NULL);
@@ -190,9 +209,9 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo
   if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
   *is_zero = false;
   void* p = NULL;
-  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
+  int err = _mi_prim_alloc(hint_addr, size, try_alignment, commit, allow_large, is_large, is_zero, &p);
   if (err != 0) {
-    _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
+    _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large);
   }
 
   MI_UNUSED(tld_stats);
@@ -212,6 +231,10 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo
   return p;
 }
 
+static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) {
+  return mi_os_prim_alloc_at(NULL, size, try_alignment, commit, allow_large, is_large, is_zero, tld_stats);
+}
+
 
 // Primitive aligned allocation from the OS.
 // This function guarantees the allocated memory is aligned.
@@ -235,7 +258,9 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
   }
   else {
     // if not aligned, free it, overallocate, and unmap around it
+    #if !MI_TRACK_ASAN
     _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
+    #endif
     mi_os_prim_free(p, size, commit, stats);
     if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
     const size_t over_size = size + alignment;
@@ -261,7 +286,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
       p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
       if (p == NULL) return NULL;
 
-      // and selectively unmap parts around the over-allocated area. 
+      // and selectively unmap parts around the over-allocated area.
       void* aligned_p = mi_align_up_ptr(p, alignment);
       size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
       size_t mid_size = _mi_align_up(size, _mi_os_page_size());
diff --git a/src/page.c b/src/page.c
index 5671c7d4..6ae4f172 100644
--- a/src/page.c
+++ b/src/page.c
@@ -436,9 +436,6 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
 
   // no more aligned blocks in here
   mi_page_set_has_aligned(page, false);
-  #if MI_DEBUG_GUARDED
-  mi_page_set_has_guarded(page, false);
-  #endif
 
   mi_heap_t* heap = mi_page_heap(page);
 
@@ -467,9 +464,6 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
   mi_assert_internal(mi_page_all_free(page));
 
   mi_page_set_has_aligned(page, false);
-  #if MI_DEBUG_GUARDED
-  mi_page_set_has_guarded(page, false);
-  #endif
 
   // don't retire too often..
   // (or we end up retiring and re-allocating most of the time)
diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c
index 944c0cb4..82147de7 100644
--- a/src/prim/emscripten/prim.c
+++ b/src/prim/emscripten/prim.c
@@ -71,8 +71,8 @@ int _mi_prim_free(void* addr, size_t size) {
 extern void* emmalloc_memalign(size_t alignment, size_t size);
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
-  MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit);
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+  MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit); MI_UNUSED(hint_addr);
   *is_large = false;
   // TODO: Track the highest address ever seen; first uses of it are zeroes.
   //       That assumes no one else uses sbrk but us (they could go up,
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index a6628fb7..6c224cb0 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -139,6 +139,12 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
   if (psize > 0) {
     config->page_size = (size_t)psize;
     config->alloc_granularity = (size_t)psize;
+    #if defined(_SC_PHYS_PAGES)
+    long pphys = sysconf(_SC_PHYS_PAGES);
+    if (pphys > 0 && (size_t)pphys < (SIZE_MAX/(size_t)psize)) {
+      config->physical_memory = (size_t)pphys * (size_t)psize;
+    }
+    #endif
   }
   config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
   config->has_overcommit = unix_detect_overcommit();
@@ -351,14 +357,14 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
 }
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
 
   *is_zero = true;
   int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
-  *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
+  *addr = unix_mmap(hint_addr, size, try_alignment, protect_flags, false, allow_large, is_large);
   return (*addr != NULL ? 0 : errno);
 }
 
diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c
index 5d7a8132..e1e7de5e 100644
--- a/src/prim/wasi/prim.c
+++ b/src/prim/wasi/prim.c
@@ -119,8 +119,8 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) {
 }
 
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
-  MI_UNUSED(allow_large); MI_UNUSED(commit);
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+  MI_UNUSED(allow_large); MI_UNUSED(commit); MI_UNUSED(hint_addr);
   *is_large = false;
   *is_zero = false;
   *addr = mi_prim_mem_grow(size, try_alignment);
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 385354fc..1d3d6f41 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -118,6 +118,18 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
   GetSystemInfo(&si);
   if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
   if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; }
+  // get virtual address bits
+  if ((uintptr_t)si.lpMaximumApplicationAddress > 0) {
+    const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
+    config->virtual_address_bits = vbits;
+  }
+  // get physical memory
+  ULONGLONG memInKiB = 0;
+  if (GetPhysicallyInstalledSystemMemory(&memInKiB)) {
+    if (memInKiB > 0 && memInKiB < (SIZE_MAX / MI_KiB)) {
+      config->physical_memory = memInKiB * MI_KiB;
+    }
+  }
   // get the VirtualAlloc2 function
   HINSTANCE  hDll;
   hDll = LoadLibrary(TEXT("kernelbase.dll"));
@@ -191,7 +203,7 @@ static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_ali
   }
   #endif
   // on modern Windows try use VirtualAlloc2 for aligned allocation
-  if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
+  if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
     MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
     reqs.Alignment = try_alignment;
     MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
@@ -279,14 +291,14 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW
   return p;
 }
 
-int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   mi_assert_internal(commit || !allow_large);
   mi_assert_internal(try_alignment > 0);
   *is_zero = true;
   int flags = MEM_RESERVE;
   if (commit) { flags |= MEM_COMMIT; }
-  *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
+  *addr = win_virtual_alloc(hint_addr, size, try_alignment, flags, false, allow_large, is_large);
   return (*addr != NULL ? 0 : (int)GetLastError());
 }
 
@@ -617,8 +629,8 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
     _mi_process_done();
   }
   else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) {
-    _mi_thread_done(NULL); 
-  }  
+    _mi_thread_done(NULL);
+  }
 }
 
 
@@ -681,7 +693,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
     #pragma data_seg()
     #pragma data_seg(".CRT$XLY")
     PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach };
-    #pragma data_seg()    
+    #pragma data_seg()
   #endif
 
   #if defined(__cplusplus)
@@ -695,13 +707,13 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
     MI_UNUSED(heap);
   }
 
-#else // deprecated: statically linked, use fiber api 
+#else // deprecated: statically linked, use fiber api
 
   #if defined(_MSC_VER) // on clang/gcc use the constructor attribute (in `src/prim/prim.c`)
     // MSVC: use data section magic for static libraries
     // See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm>
     #define MI_PRIM_HAS_PROCESS_ATTACH 1
-    
+
     static int mi_process_attach(void) {
       mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL);
       atexit(&_mi_process_done);
@@ -754,9 +766,9 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   }
 #endif
 
-// ---------------------------------------------------- 
+// ----------------------------------------------------
 // Communicate with the redirection module on Windows
-// ---------------------------------------------------- 
+// ----------------------------------------------------
 #if defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT)
   #define MI_PRIM_HAS_ALLOCATOR_INIT 1
 
diff --git a/src/stats.c b/src/stats.c
index a9364027..a2d97e94 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -119,6 +119,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
   mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1);
   mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
   mi_stat_counter_add(&stats->large_count, &src->large_count, 1);
+  mi_stat_counter_add(&stats->guarded_alloc_count, &src->guarded_alloc_count, 1);
 #if MI_STAT>1
   for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
     if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) {
@@ -345,6 +346,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
   mi_stat_counter_print(&stats->reset_calls, "resets", out, arg);
   mi_stat_counter_print(&stats->purge_calls, "purges", out, arg);
+  mi_stat_counter_print(&stats->guarded_alloc_count, "guarded", out, arg);
   mi_stat_print(&stats->threads, "threads", -1, out, arg);
   mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
   _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count());
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 07af1090..b2b6ee20 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -20,12 +20,9 @@ static void test_reserved(void);
 static void negative_stat(void);
 static void alloc_huge(void);
 static void test_heap_walk(void);
-<<<<<<< HEAD
 static void test_heap_arena(void);
 static void test_align(void);
-=======
 static void test_canary_leak(void);
->>>>>>> dev
 // static void test_large_pages(void);
 
 int main() {
diff --git a/test/main-override.cpp b/test/main-override.cpp
index 5e8b6f82..fc9c3f22 100644
--- a/test/main-override.cpp
+++ b/test/main-override.cpp
@@ -11,7 +11,7 @@
 #include <iostream>
 
 #include <thread>
-#include <mimalloc.h>
+//#include <mimalloc.h>
 #include <assert.h>
 
 #ifdef _WIN32
@@ -35,21 +35,23 @@ static void test_mt_shutdown();
 static void large_alloc(void);        // issue #363
 static void fail_aslr();              // issue #372
 static void tsan_numa_test();         // issue #414
-static void strdup_test();            // issue #445 
+static void strdup_test();            // issue #445
 static void bench_alloc_large(void);  // issue #xxx
 //static void test_large_migrate(void); // issue #691
 static void heap_thread_free_huge();
 static void test_std_string();        // issue #697
 static void test_thread_local();      // issue #944
-
+// static void test_mixed0();             // issue #942
+static void test_mixed1();             // issue #942
 static void test_stl_allocators();
 
 
 int main() {
   // mi_stats_reset();  // ignore earlier allocations
 
+  test_mixed1();
   //test_std_string();
-  test_thread_local();
+  //test_thread_local();
   // heap_thread_free_huge();
   /*
    heap_thread_free_huge();
@@ -65,10 +67,9 @@ int main() {
   // test_stl_allocators();
   // test_mt_shutdown();
   // test_large_migrate();
-  
+
   //fail_aslr();
-  // bench_alloc_large();
-  // mi_stats_print(NULL);
+  mi_stats_print(NULL);
   return 0;
 }
 
@@ -187,6 +188,53 @@ static void test_stl_allocators() {
 #endif
 }
 
+#if 0
+#include <algorithm>
+#include <chrono>
+#include <functional>
+#include <iostream>
+#include <thread>
+#include <vector>
+
+static void test_mixed0() {
+    std::vector<std::unique_ptr<std::size_t>> numbers(1024 * 1024 * 100);
+    std::vector<std::thread> threads(1);
+
+    std::atomic<std::size_t> index{};
+
+    auto start = std::chrono::system_clock::now();
+
+    for (auto& thread : threads) {
+        thread = std::thread{[&index, &numbers]() {
+            while (true) {
+                auto i = index.fetch_add(1, std::memory_order_relaxed);
+                if (i >= numbers.size()) return;
+
+                numbers[i] = std::make_unique<std::size_t>(i);
+            }
+        }};
+    }
+
+    for (auto& thread : threads) thread.join();
+
+    auto end = std::chrono::system_clock::now();
+
+    auto duration =
+        std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+    std::cout << "Running on " << threads.size() << " threads took " << duration
+              << std::endl;
+}
+#endif
+
+void asd() {
+  void* p = malloc(128);
+  free(p);
+}
+static void test_mixed1() {
+    std::thread thread(asd);
+    thread.join();
+}
+
 #if 0
 // issue #691
 static char* cptr;
diff --git a/test/test-api-fill.c b/test/test-api-fill.c
index 3baee83d..eebbd394 100644
--- a/test/test-api-fill.c
+++ b/test/test-api-fill.c
@@ -271,7 +271,7 @@ int main(void) {
     mi_free(p);
   };
 
-  #if !(MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_DEBUG_GUARDED)
+  #if !(MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_GUARDED)
   CHECK_BODY("fill-freed-small") {
     size_t malloc_size = MI_SMALL_SIZE_MAX / 2;
     uint8_t* p = (uint8_t*)mi_malloc(malloc_size);
diff --git a/test/test-stress.c b/test/test-stress.c
index caf18798..0e8b45a2 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -22,19 +22,22 @@ terms of the MIT license.
 #include <string.h>
 #include <assert.h>
 
+// #define MI_GUARDED
+// #define USE_STD_MALLOC
+
 // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
 //
 // argument defaults
 #if defined(MI_TSAN)          // with thread-sanitizer reduce the threads to test within the azure pipeline limits
-static int THREADS = 8;       
+static int THREADS = 8;
 static int SCALE   = 25;
 static int ITER    = 400;
 #elif defined(MI_UBSAN)       // with undefined behavious sanitizer reduce parameters to stay within the azure pipeline limits
-static int THREADS = 8;       
+static int THREADS = 8;
 static int SCALE   = 25;
 static int ITER    = 20;
-#elif defined(MI_DEBUG_GUARDED) // with debug guard pages reduce parameters to stay within the azure pipeline limits
-static int THREADS = 8;       
+#elif defined(xMI_GUARDED)     // with debug guard pages reduce parameters to stay within the azure pipeline limits
+static int THREADS = 8;
 static int SCALE   = 10;
 static int ITER    = 10;
 #else
@@ -47,16 +50,11 @@ static int ITER    = 50;      // N full iterations destructing and re-creating a
 
 #define STRESS                // undefine for leak test
 
-#ifndef NDEBUG
-#define HEAP_WALK             // walk the heap objects?
-#endif
-
 static bool   allow_large_objects = true;     // allow very large objects? (set to `true` if SCALE>100)
 static size_t use_one_size = 0;               // use single object size of `N * sizeof(uintptr_t)`?
 
 static bool   main_participates = false;       // main thread participates as a worker too
 
-// #define USE_STD_MALLOC
 #ifdef USE_STD_MALLOC
 #define custom_calloc(n,s)    calloc(n,s)
 #define custom_realloc(p,s)   realloc(p,s)
@@ -66,6 +64,9 @@ static bool   main_participates = false;       // main thread participates as a
 #define custom_calloc(n,s)    mi_calloc(n,s)
 #define custom_realloc(p,s)   mi_realloc(p,s)
 #define custom_free(p)        mi_free(p)
+#ifndef NDEBUG
+#define HEAP_WALK             // walk the heap objects?
+#endif
 #endif
 
 // transfer pointer between threads
@@ -220,7 +221,7 @@ static void test_stress(void) {
   uintptr_t r = rand();
   for (int n = 0; n < ITER; n++) {
     run_os_threads(THREADS, &stress);
-    #ifndef NDEBUG
+    #if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
     // switch between arena and OS allocation for testing
     mi_option_set_enabled(mi_option_disallow_arena_alloc, (n%2)==1);
     #endif
@@ -270,7 +271,7 @@ int main(int argc, char** argv) {
   #ifdef HEAP_WALK
     mi_option_enable(mi_option_visit_abandoned);
   #endif
-  #ifndef NDEBUG
+  #if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
     mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */);
   #endif
   #ifndef USE_STD_MALLOC