diff --git a/contrib/docker/alpine-arm32v7/Dockerfile b/contrib/docker/alpine-arm32v7/Dockerfile
index f74934fb..daa60f50 100644
--- a/contrib/docker/alpine-arm32v7/Dockerfile
+++ b/contrib/docker/alpine-arm32v7/Dockerfile
@@ -1,6 +1,6 @@
 # install from an image
 # download first an appropriate tar.gz image into the current directory
-# from: <https://github.com/alpinelinux/docker-alpine/tree/edge/armv7>
+# from <https://github.com/alpinelinux/docker-alpine/tree/edge/armv7>
 FROM scratch
 
 # Substitute the image name that was downloaded
diff --git a/contrib/docker/alpine-x86/Dockerfile b/contrib/docker/alpine-x86/Dockerfile
new file mode 100644
index 00000000..a0f76c17
--- /dev/null
+++ b/contrib/docker/alpine-x86/Dockerfile
@@ -0,0 +1,28 @@
+# install from an image
+# download first an appropriate tar.gz image into the current directory
+# from <https://github.com/alpinelinux/docker-alpine/tree/edge/x86>
+FROM scratch
+
+# Substitute the image name that was downloaded
+ADD alpine-minirootfs-20250108-x86.tar.gz /
+
+# Install tools
+RUN apk add build-base make cmake
+RUN apk add git
+RUN apk add vim
+
+RUN mkdir -p  /home/dev
+WORKDIR /home/dev
+
+# Get mimalloc
+RUN git clone https://github.com/microsoft/mimalloc -b dev2
+RUN mkdir -p mimalloc/out/release
+RUN mkdir -p mimalloc/out/debug
+
+# Build mimalloc debug
+WORKDIR /home/dev/mimalloc/out/debug
+RUN cmake ../.. -DMI_DEBUG_FULL=ON
+# RUN make -j
+# RUN make test
+
+CMD ["/bin/sh"]
diff --git a/include/mimalloc.h b/include/mimalloc.h
index c4a4eb19..a2268850 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -411,7 +411,8 @@ typedef enum mi_option_e {
   mi_option_max_vabits,                 // max user space virtual address bits to consider (=48)
   mi_option_pagemap_commit,             // commit the full pagemap (to always catch invalid pointer uses) (=0)
   mi_option_page_commit_on_demand,      // commit page memory on-demand
-  mi_option_page_reclaim_max,           // don't reclaim pages if we already own N pages (in that size class) (=16)
+  mi_option_page_max_reclaim,           // don't reclaim pages of the same originating heap if we already own N pages (in that size class) (=-1 (unlimited))
+  mi_option_page_cross_thread_max_reclaim, // don't reclaim pages across threads if we already own N pages (in that size class) (=16)
   _mi_option_last,
   // legacy option names
   mi_option_large_os_pages = mi_option_allow_large_os_pages,
diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h
index 0c62cd3b..371cb7ce 100644
--- a/include/mimalloc/bits.h
+++ b/include/mimalloc/bits.h
@@ -120,6 +120,7 @@ typedef int32_t  mi_ssize_t;
 #define MI_MAX_VABITS     (32)
 #endif
 
+
 // use a flat page-map (or a 2-level one)
 #ifndef MI_PAGE_MAP_FLAT
 #if MI_MAX_VABITS <= 40 && !defined(__APPLE__) 
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index a5ab8162..9c5eb362 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -200,6 +200,7 @@ void          _mi_page_map_register(mi_page_t* page);
 void          _mi_page_map_unregister(mi_page_t* page);
 void          _mi_page_map_unregister_range(void* start, size_t size);
 mi_page_t*    _mi_safe_ptr_page(const void* p);
+void          _mi_page_map_unsafe_destroy(void);
 
 // "page.c"
 void*         _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment)  mi_attr_noexcept mi_attr_malloc;
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index ab085c8b..03ebe7dd 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -120,7 +120,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
 // Is this thread part of a thread pool?
 bool _mi_prim_thread_is_in_threadpool(void);
 
-
+#define MI_WIN_USE_FIXED_TLS  1
 
 //-------------------------------------------------------------------
 // Access to TLS (thread local storage) slots.
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index d5df5f4a..86aeb07f 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -471,7 +471,6 @@ struct mi_heap_s {
   size_t                guarded_size_min;                    // minimal size for guarded objects
   size_t                guarded_size_max;                    // maximal size for guarded objects
   size_t                guarded_sample_rate;                 // sample rate (set to 0 to disable guarded pages)
-  size_t                guarded_sample_seed;                 // starting sample count
   size_t                guarded_sample_count;                // current sample count (counting down to 0)
   #endif
   mi_page_t*            pages_free_direct[MI_PAGES_DIRECT];  // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
diff --git a/src/arena.c b/src/arena.c
index 32d1c338..95cc8d63 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1497,7 +1497,7 @@ static void mi_debug_show_arenas_ex(bool show_pages, bool narrow) mi_attr_noexce
     if (arena == NULL) break;
     mi_assert(arena->subproc == subproc);
     // slice_total += arena->slice_count;
-    _mi_raw_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""), arena->subproc);
+    _mi_raw_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, (size_t)(mi_size_of_slices(arena->slice_count)/MI_MiB), (arena->memid.is_pinned ? ", pinned" : ""), arena->subproc);
     //if (show_inuse) {
     //  free_total += mi_debug_show_bbitmap("in-use slices", arena->slice_count, arena->slices_free, true, NULL);
     //}
diff --git a/src/free.c b/src/free.c
index b892803c..f4f8666b 100644
--- a/src/free.c
+++ b/src/free.c
@@ -148,7 +148,7 @@ static inline mi_page_t* mi_validate_ptr_page(const void* p, const char* msg)
   }
   mi_page_t* page = _mi_safe_ptr_page(p);
   if (p != NULL && page == NULL) {
-    _mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);    
+    _mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);
   }
   return page;
   #else
@@ -163,7 +163,7 @@ void mi_free(void* p) mi_attr_noexcept
   mi_page_t* const page = mi_validate_ptr_page(p,"mi_free");
   if mi_unlikely(page==NULL) return;  // page will be NULL if p==NULL
   mi_assert_internal(p!=NULL && page!=NULL);
-  
+
   const mi_threadid_t xtid = (_mi_prim_thread_id() ^ mi_page_xthread_id(page));
   if mi_likely(xtid == 0) {                        // `tid == mi_page_thread_id(page) && mi_page_flags(page) == 0`
     // thread-local, aligned, and not a full page
@@ -191,10 +191,11 @@ void mi_free(void* p) mi_attr_noexcept
 // Multi-threaded Free (`_mt`)
 // ------------------------------------------------------
 static bool mi_page_unown_from_free(mi_page_t* page, mi_block_t* mt_free);
-static inline bool mi_page_queue_len_is_atmost( mi_heap_t* heap, size_t block_size, size_t atmost) {
+static inline bool mi_page_queue_len_is_atmost( mi_heap_t* heap, size_t block_size, long atmost) {
+  if (atmost < 0) return true; // unlimited
   mi_page_queue_t* const pq = mi_page_queue(heap,block_size);
   mi_assert_internal(pq!=NULL);
-  return (pq->count <= atmost);
+  return (pq->count <= (size_t)atmost);
   /*
   for(mi_page_t* p = pq->first; p!=NULL; p = p->next, atmost--) {
     if (atmost == 0) { return false; }
@@ -219,7 +220,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t*
   // 1. free if the page is free now  (this is updated by `_mi_page_free_collect_partly`)
   if (mi_page_all_free(page))
   {
-    // first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish)
+    // first remove it from the abandoned pages in the arena (if mapped, this might wait for any readers to finish)
     _mi_arenas_page_unabandon(page);
     // we can free the page directly
     _mi_arenas_page_free(page);
@@ -242,17 +243,20 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t*
           heap = _mi_heap_by_tag(heap, page->heap_tag);
         }
       }
-      // can we reclaim into this heap?
-      if (heap != NULL && heap->allow_page_reclaim) {
-        const long reclaim_max = _mi_option_get_fast(mi_option_page_reclaim_max);
-        if ((heap == page->heap && mi_page_queue_len_is_atmost(heap, page->block_size, reclaim_max)) ||  // only reclaim if we were the originating heap, and we have at most N pages already
-          (reclaim_on_free == 1 &&               // OR if the reclaim across heaps is allowed
-            !mi_page_is_used_at_frac(page, 8) &&  //    and the page is not too full
-            !heap->tld->is_in_threadpool &&       //    and not part of a threadpool
-            _mi_arena_memid_is_suitable(page->memid, heap->exclusive_arena))  // and the memory is suitable
+      // can we reclaim into this heap?      
+      if (heap != NULL && heap->allow_page_reclaim)      
+      {
+        if ((heap == page->heap &&  // always reclaim if we were the originating heap (todo: maybe not if in a threadpool?)
+             mi_page_queue_len_is_atmost(heap, page->block_size, _mi_option_get_fast(mi_option_page_max_reclaim))) 
+            || // OR:
+            (reclaim_on_free == 1 &&               // reclaim across heaps is allowed
+              mi_page_queue_len_is_atmost(heap, page->block_size, _mi_option_get_fast(mi_option_page_cross_thread_max_reclaim)) &&
+              !mi_page_is_used_at_frac(page,8) &&  // and the page is not too full
+              !heap->tld->is_in_threadpool &&      // and not part of a threadpool
+              _mi_arena_memid_is_suitable(page->memid, heap->exclusive_arena))  // and the memory is suitable
           )
         {
-          // first remove it from the abandoned pages in the arena -- this waits for any readers to finish
+          // first remove it from the abandoned pages in the arena -- this might wait for any readers to finish
           _mi_arenas_page_unabandon(page);
           _mi_heap_page_reclaim(heap, page);
           mi_heap_stat_counter_increase(heap, pages_reclaim_on_free, 1);
diff --git a/src/init.c b/src/init.c
index 892f4988..77630a50 100644
--- a/src/init.c
+++ b/src/init.c
@@ -132,7 +132,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   true,                   // can eager abandon
   0,                      // tag
   #if MI_GUARDED
-  0, 0, 0, 0, 1,          // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
+  0, 0, 0, 1,          // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
   #endif
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
@@ -171,7 +171,7 @@ mi_decl_cache_align mi_heap_t heap_main = {
   true,                   // allow page abandon
   0,                      // tag
   #if MI_GUARDED
-  0, 0, 0, 0, 0,
+  0, 0, 0, 0,
   #endif
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
@@ -193,15 +193,14 @@ mi_stats_t _mi_stats_main = { MI_STAT_VERSION, MI_STATS_NULL };
 
 #if MI_GUARDED
 mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) {
-  heap->guarded_sample_seed = seed;
-  if (heap->guarded_sample_seed == 0) {
-    heap->guarded_sample_seed = _mi_heap_random_next(heap);
-  }
   heap->guarded_sample_rate  = sample_rate;
-  if (heap->guarded_sample_rate >= 1) {
-    heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate;
+  heap->guarded_sample_count = sample_rate;  // count down samples
+  if (heap->guarded_sample_rate > 1) {
+    if (seed == 0) {
+      seed = _mi_heap_random_next(heap);
+    }
+    heap->guarded_sample_count = (seed % heap->guarded_sample_rate) + 1;  // start at random count between 1 and `sample_rate`
   }
-  heap->guarded_sample_count = 1 + heap->guarded_sample_seed;  // count down samples
 }
 
 mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) {
@@ -780,6 +779,7 @@ void mi_cdecl _mi_process_done(void) {
     mi_heap_collect(heap, true /* force */);
     _mi_heap_unsafe_destroy_all(heap);     // forcefully release all memory held by all heaps (of this thread only!)
     _mi_arenas_unsafe_destroy_all(heap->tld);
+    _mi_page_map_unsafe_destroy();
   }
 
   if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
diff --git a/src/options.c b/src/options.c
index f1d16d6b..5760ac5c 100644
--- a/src/options.c
+++ b/src/options.c
@@ -98,7 +98,15 @@ int mi_version(void) mi_attr_noexcept {
 #endif
 #endif
 
-// Static options 
+#ifndef MI_DEFAULT_PAGE_MAX_RECLAIM
+#define MI_DEFAULT_PAGE_MAX_RECLAIM  (-1)               // unlimited
+#endif
+
+#ifndef MI_DEFAULT_PAGE_CROSS_THREAD_MAX_RECLAIM
+#define MI_DEFAULT_PAGE_CROSS_THREAD_MAX_RECLAIM  16
+#endif
+
+// Static options
 static mi_option_desc_t mi_options[_mi_option_last] =
 {
   // stable options
@@ -157,14 +165,17 @@ static mi_option_desc_t mi_options[_mi_option_last] =
          MI_OPTION_UNINIT, MI_OPTION(guarded_sample_rate)},       // 1 out of N allocations in the min/max range will be guarded (=4000)
   { 0,   MI_OPTION_UNINIT, MI_OPTION(guarded_sample_seed)},
   { 10000, MI_OPTION_UNINIT, MI_OPTION(generic_collect) },        // collect heaps every N (=10000) generic allocation calls
-  { 0,   MI_OPTION_UNINIT, MI_OPTION_LEGACY(page_reclaim_on_free, abandoned_reclaim_on_free) },// reclaim abandoned pages on a free: -1 = disable completely, 0 = only reclaim into the originating heap, 1 = reclaim on free across heaps
+  { 0,   MI_OPTION_UNINIT, MI_OPTION_LEGACY(page_reclaim_on_free, abandoned_reclaim_on_free) },// reclaim abandoned (small) pages on a free: -1 = disable completely, 0 = only reclaim into the originating heap, 1 = reclaim on free across heaps
   { 2,   MI_OPTION_UNINIT, MI_OPTION(page_full_retain) },         // number of (small) pages to retain in the free page queues
   { 4,   MI_OPTION_UNINIT, MI_OPTION(page_max_candidates) },      // max search to find a best page candidate
   { 0,   MI_OPTION_UNINIT, MI_OPTION(max_vabits) },               // max virtual address space bits
   { MI_DEFAULT_PAGEMAP_COMMIT,
          MI_OPTION_UNINIT, MI_OPTION(pagemap_commit) },           // commit the full pagemap upfront?
   { 0,   MI_OPTION_UNINIT, MI_OPTION(page_commit_on_demand) },    // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
-  { 16,  MI_OPTION_UNINIT, MI_OPTION(page_reclaim_max) },         // don't reclaim pages if we already own N pages (in that size class)
+  { MI_DEFAULT_PAGE_MAX_RECLAIM,
+         MI_OPTION_UNINIT, MI_OPTION(page_max_reclaim) },         // don't reclaim (small) pages of the same originating heap if we already own N pages in that size class
+  { MI_DEFAULT_PAGE_CROSS_THREAD_MAX_RECLAIM,
+         MI_OPTION_UNINIT, MI_OPTION(page_cross_thread_max_reclaim) }, // don't reclaim (small) pages across threads if we already own N pages in that size class
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/src/page-map.c b/src/page-map.c
index c286d87e..c8686924 100644
--- a/src/page-map.c
+++ b/src/page-map.c
@@ -71,6 +71,17 @@ bool _mi_page_map_init(void) {
   return true;
 }
 
+void _mi_page_map_unsafe_destroy(void) {
+  mi_assert_internal(_mi_page_map != NULL);
+  if (_mi_page_map == NULL) return;
+  _mi_os_free(mi_page_map_memid.mem.os.base, mi_page_map_memid.mem.os.size, mi_page_map_memid);
+  _mi_page_map = NULL;
+  mi_page_map_commit = NULL;
+  mi_page_map_max_address = NULL;
+  mi_page_map_memid = _mi_memid_none();  
+}
+
+
 static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
   // is the page map area that contains the page address committed?
   // we always set the commit bits so we can track what ranges are in-use.
@@ -163,11 +174,12 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att
 #define MI_PAGE_MAP_SUB_SIZE    (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))
 
 mi_decl_cache_align mi_page_t*** _mi_page_map;
+static size_t       mi_page_map_count;
 static void*        mi_page_map_max_address;
 static mi_memid_t   mi_page_map_memid;
-
 static _Atomic(mi_bfield_t)  mi_page_map_commit;
 
+static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx);
 static mi_page_t** mi_page_map_ensure_committed(size_t idx);
 static mi_page_t** mi_page_map_ensure_at(size_t idx);
 static inline void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count);
@@ -184,10 +196,10 @@ bool _mi_page_map_init(void) {
   // Allocate the page map and commit bits
   mi_assert(MI_MAX_VABITS >= vbits);
   mi_page_map_max_address = (void*)(vbits >= MI_SIZE_BITS ? (SIZE_MAX - MI_ARENA_SLICE_SIZE + 1) : (MI_PU(1) << vbits));
-  const size_t page_map_count = (MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT));
-  mi_assert(page_map_count <= MI_PAGE_MAP_COUNT);
+  mi_page_map_count = (MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT));
+  mi_assert(mi_page_map_count <= MI_PAGE_MAP_COUNT);
   const size_t os_page_size = _mi_os_page_size();
-  const size_t page_map_size = _mi_align_up( page_map_count * sizeof(mi_page_t**), os_page_size);
+  const size_t page_map_size = _mi_align_up( mi_page_map_count * sizeof(mi_page_t**), os_page_size);
   const size_t reserve_size = page_map_size + os_page_size;
   const bool commit = page_map_size <= 64*MI_KiB ||
                       mi_option_is_enabled(mi_option_pagemap_commit) || _mi_os_has_overcommit();
@@ -206,14 +218,40 @@ bool _mi_page_map_init(void) {
   if (!mi_page_map_memid.initially_committed) {
     _mi_os_commit(&_mi_page_map[0], os_page_size, NULL);  // commit first part of the map
   }
-  _mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size);  // we reserved 2 sub maps at the end already
+  _mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size);  // we reserved a submap part at the end already
   if (!mi_page_map_memid.initially_committed) {
     _mi_os_commit(_mi_page_map[0], os_page_size, NULL);   // only first OS page
   }
+  if (!mi_page_map_memid.initially_zero) {                // initialize first addresses with NULL
+    _mi_memzero_aligned(_mi_page_map[0], os_page_size);
+  }
+
   mi_assert_internal(_mi_ptr_page(NULL)==NULL);
   return true;
 }
 
+void _mi_page_map_unsafe_destroy(void) {
+  mi_assert_internal(_mi_page_map != NULL);
+  if (_mi_page_map == NULL) return;
+  for (size_t idx = 1; idx < mi_page_map_count; idx++) {  // skip entry 0
+    // free all sub-maps
+    if (mi_page_map_is_committed(idx, NULL)) {
+      mi_page_t** sub = _mi_page_map[idx];
+      if (sub != NULL) {
+        mi_memid_t memid = _mi_memid_create_os(sub, MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), true, false, false);
+        _mi_os_free(memid.mem.os.base, memid.mem.os.size, memid);
+        _mi_page_map[idx] = NULL;
+      }
+    }
+  }
+  _mi_os_free(_mi_page_map, mi_page_map_memid.mem.os.size, mi_page_map_memid);
+  _mi_page_map = NULL;
+  mi_page_map_count = 0;
+  mi_page_map_memid = _mi_memid_none();
+  mi_page_map_max_address = NULL;
+  mi_atomic_store_release(&mi_page_map_commit, 0);
+}
+
 
 #define MI_PAGE_MAP_ENTRIES_PER_CBIT  (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS)
 
@@ -237,19 +275,24 @@ static mi_page_t** mi_page_map_ensure_committed(size_t idx) {
 
 static mi_page_t** mi_page_map_ensure_at(size_t idx) {
   mi_page_t** sub = mi_page_map_ensure_committed(idx);
-  if mi_unlikely(sub == NULL) {
+  if mi_unlikely(sub == NULL || idx == 0 /* low addresses */) {
     // sub map not yet allocated, alloc now
     mi_memid_t memid;
-    sub = (mi_page_t**)_mi_os_alloc(MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), &memid);
-    mi_page_t** expect = NULL;
-    if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, ((_Atomic(mi_page_t**)*)&_mi_page_map[idx]), &expect, sub)) {
-      // another thread already allocated it.. free and continue
-      _mi_os_free(sub, MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), memid);
-      sub = expect;
-      mi_assert_internal(sub!=NULL);
-    }
+    mi_page_t** expect = sub;
+    const size_t submap_size = MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*);
+    sub = (mi_page_t**)_mi_os_alloc(submap_size, &memid);
     if (sub == NULL) {
       _mi_error_message(EFAULT, "internal error: unable to extend the page map\n");
+      return NULL;
+    }
+    if (!memid.initially_zero) {
+      _mi_memzero_aligned(sub, submap_size);
+    }
+    if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, ((_Atomic(mi_page_t**)*)&_mi_page_map[idx]), &expect, sub)) {
+      // another thread already allocated it.. free and continue
+      _mi_os_free(sub, submap_size, memid);
+      sub = expect;
+      mi_assert_internal(sub!=NULL);
     }
   }
   return sub;
diff --git a/src/page.c b/src/page.c
index 4b66841b..0d8e4e12 100644
--- a/src/page.c
+++ b/src/page.c
@@ -396,6 +396,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq) {
   // and free it
   mi_heap_t* heap = page->heap;
   mi_heap_stat_decrease(heap, page_bins[mi_page_bin(page)], 1);
+  mi_heap_stat_decrease(heap, pages, 1);
   mi_page_set_heap(page,NULL);
   _mi_arenas_page_free(page);
   _mi_arenas_collect(false, false, heap->tld);  // allow purging
diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c
index 6818dd84..db38c3a1 100644
--- a/src/prim/unix/prim.c
+++ b/src/prim/unix/prim.c
@@ -32,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #if defined(__linux__)
   #include <features.h>
   #include <sys/prctl.h>    // THP disable, PR_SET_VMA
-  #if !defined(PR_SET_VMA)
+  #if defined(__GLIBC__) && !defined(PR_SET_VMA)
   #include <linux/prctl.h>
   #endif
   #if defined(__GLIBC__)
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index 780cae18..992e8eea 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -647,18 +647,16 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
 //----------------------------------------------------------------
 
 #if MI_WIN_USE_FIXED_TLS==1
-mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*);  // use 2nd slot by default
+mi_decl_cache_align size_t _mi_win_tls_offset = 0;  
 #endif
 
-static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
-  MI_UNUSED(reserved);
-  MI_UNUSED(module);
+static void mi_win_tls_init(DWORD reason) {
   #if MI_HAS_TLS_SLOT >= 2  // we must initialize the TLS slot before any allocation
   #if MI_WIN_USE_FIXED_TLS==1
-  if (reason==DLL_PROCESS_ATTACH) {
-    const DWORD tls_slot = TlsAlloc();
-    if (tls_slot == TLS_OUT_OF_INDEXES) { 
-      _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); 
+  if (reason==DLL_PROCESS_ATTACH && _mi_win_tls_offset == 0) {
+    const DWORD tls_slot = TlsAlloc();  // usually returns slot 1
+    if (tls_slot == TLS_OUT_OF_INDEXES) {
+      _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");
     }
     _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
   }
@@ -672,7 +670,15 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
     mi_assert_internal(p == (void*)&_mi_heap_empty);
     #endif  
   }
+  #else
+  MI_UNUSED(reason);
   #endif
+}
+
+static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
+  MI_UNUSED(reserved);
+  MI_UNUSED(module);
+  mi_win_tls_init(reason);
   if (reason==DLL_PROCESS_ATTACH) {
     _mi_process_load();
   }
@@ -834,11 +840,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
   #endif
   mi_decl_export void _mi_redirect_entry(DWORD reason) {
     // called on redirection; careful as this may be called before DllMain
-    #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation
-    if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) {
-      _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
-    }
-    #endif
+    mi_win_tls_init(reason);
     if (reason == DLL_PROCESS_ATTACH) {
       mi_redirected = true;
     }
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 201dc7e1..3e47874e 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -43,7 +43,7 @@ int main() {
   // corrupt_free();
   // block_overflow1();
   // block_overflow2();
-  // test_canary_leak();
+  test_canary_leak();
   // test_aslr();
   // invalid_free();
   // test_reserved();