diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h
index 371cb7ce..e3038d11 100644
--- a/include/mimalloc/bits.h
+++ b/include/mimalloc/bits.h
@@ -123,13 +123,17 @@ typedef int32_t  mi_ssize_t;
 
 // use a flat page-map (or a 2-level one)
 #ifndef MI_PAGE_MAP_FLAT
-#if MI_MAX_VABITS <= 40 && !defined(__APPLE__) 
+#if MI_MAX_VABITS <= 40 && !MI_SECURE && !defined(__APPLE__) 
 #define MI_PAGE_MAP_FLAT  1
 #else
 #define MI_PAGE_MAP_FLAT  0
 #endif
 #endif
 
+#if MI_PAGE_MAP_FLAT && MI_SECURE
+#error should not use MI_PAGE_MAP_FLAT with a secure build
+#endif
+
 
 /* --------------------------------------------------------------------------------
   Builtin's
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 9c5eb362..cfeb6387 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -184,7 +184,7 @@ void          _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld
 void          _mi_arenas_unsafe_destroy_all(mi_tld_t* tld);
 
 mi_page_t*    _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment);
-void          _mi_arenas_page_free(mi_page_t* page);
+void          _mi_arenas_page_free(mi_page_t* page, mi_tld_t* tld);
 void          _mi_arenas_page_abandon(mi_page_t* page, mi_tld_t* tld);
 void          _mi_arenas_page_unabandon(mi_page_t* page);
 bool          _mi_arenas_page_try_reabandon_to_mapped(mi_page_t* page);
@@ -219,6 +219,7 @@ void          _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head);
 void          _mi_page_init(mi_heap_t* heap, mi_page_t* page);
 bool          _mi_page_queue_is_valid(mi_heap_t* heap, const mi_page_queue_t* pq);
 
+size_t        _mi_page_bin(const mi_page_t* page); // for stats
 size_t        _mi_bin_size(size_t bin);            // for stats
 size_t        _mi_bin(size_t size);                // for stats
 
@@ -236,7 +237,9 @@ bool          _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t*
 void          _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page);
 
 // "stats.c"
+void          _mi_stats_init(void);
 void          _mi_stats_done(mi_stats_t* stats);
+void          _mi_stats_merge_thread(mi_tld_t* tld);
 void          _mi_stats_merge_from(mi_stats_t* to, mi_stats_t* from);
 mi_msecs_t    _mi_clock_now(void);
 mi_msecs_t    _mi_clock_end(mi_msecs_t start);
@@ -547,16 +550,16 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
 // 2-level page map:
 // double indirection, but low commit and low virtual reserve.
 //
-// the page-map is usually 4 MiB (for 48 bits virtual addresses) and points to sub maps of 64 KiB.
+// the page-map is usually 4 MiB (for 48 bit virtual addresses) and points to sub maps of 64 KiB.
 // the page-map is committed on-demand (in 64 KiB parts) (and sub-maps are committed on-demand as well)
 // one sub page-map = 64 KiB => covers 2^(16-3) * 2^16 = 2^29 = 512 MiB address space
-// the page-map needs 48-(16+13) = 19 bits => 2^19 sub map pointers = 4 MiB size.
+// the page-map needs 48-(16+13) = 19 bits => 2^19 sub map pointers = 2^22 bytes = 4 MiB reserved size.
 #define MI_PAGE_MAP_SUB_SHIFT     (13)
 #define MI_PAGE_MAP_SUB_COUNT     (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT)
 #define MI_PAGE_MAP_SHIFT         (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT)
 #define MI_PAGE_MAP_COUNT         (MI_ZU(1) << MI_PAGE_MAP_SHIFT)
 
-extern mi_decl_hidden mi_page_t*** _mi_page_map;
+extern mi_decl_hidden _Atomic(mi_page_t**)* _mi_page_map;
 
 static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) {
   const size_t u = (size_t)((uintptr_t)p / MI_ARENA_SLICE_SIZE);
@@ -564,16 +567,20 @@ static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) {
   return (u / MI_PAGE_MAP_SUB_COUNT);
 }
 
+static inline mi_page_t** _mi_page_map_at(size_t idx) {
+  return mi_atomic_load_ptr_relaxed(mi_page_t*, &_mi_page_map[idx]);
+}
+
 static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
   size_t sub_idx;
   const size_t idx = _mi_page_map_index(p, &sub_idx);
-  return _mi_page_map[idx][sub_idx];  // NULL if p==NULL
+  return (_mi_page_map_at(idx))[sub_idx];  // NULL if p==NULL
 }
 
 static inline mi_page_t* _mi_checked_ptr_page(const void* p) {
   size_t sub_idx;
   const size_t idx = _mi_page_map_index(p, &sub_idx);
-  mi_page_t** const sub = _mi_page_map[idx];
+  mi_page_t** const sub = _mi_page_map_at(idx);
   if mi_unlikely(sub == NULL) return NULL;
   return sub[sub_idx];
 }
@@ -583,7 +590,7 @@ static inline mi_page_t* _mi_checked_ptr_page(const void* p) {
 
 static inline mi_page_t* _mi_ptr_page(const void* p) {
   mi_assert_internal(p==NULL || mi_is_in_heap_region(p));
-  #if MI_DEBUG || defined(__APPLE__)
+  #if MI_DEBUG || MI_SECURE || defined(__APPLE__)
   return _mi_checked_ptr_page(p);
   #else
   return _mi_unchecked_ptr_page(p);
@@ -841,7 +848,7 @@ static inline bool _mi_page_unown(mi_page_t* page) {
       _mi_page_free_collect(page, false);  // update used
       if (mi_page_all_free(page)) {        // it may become free just before unowning it
         _mi_arenas_page_unabandon(page);
-        _mi_arenas_page_free(page);
+        _mi_arenas_page_free(page,NULL);
         return true;
       }
       tf_old = mi_atomic_load_relaxed(&page->xthread_free);
diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h
index 86aeb07f..71b2c93b 100644
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@@ -50,7 +50,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Define MI_SECURE to enable security mitigations. Level 1 has minimal performance impact,
 // but protects most metadata with guard pages:
-//   #define MI_SECURE 1  // guard page around metadata
+//   #define MI_SECURE 1  // guard page around metadata; check pointer validity on free
 //
 // Level 2 has more performance impact but protect well against various buffer overflows
 // by surrounding all mimalloc pages with guard pages:
diff --git a/src/arena.c b/src/arena.c
index 95cc8d63..514a0b25 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -578,8 +578,8 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_
 }
 
 // Allocate a fresh page
-static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment,
-                                            mi_arena_t* req_arena, size_t tseq, int numa_node, bool commit)
+static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_size, size_t block_alignment,
+                                            mi_arena_t* req_arena, int numa_node, bool commit, mi_tld_t* tld)
 {
   const bool allow_large = (MI_SECURE < 2); // 2 = guard page at end of each arena page
   const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN);
@@ -593,7 +593,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice
       !os_align &&                            // not large alignment
       slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large
   {
-    page = (mi_page_t*)mi_arenas_try_alloc(subproc, slice_count, page_alignment, commit, allow_large, req_arena, tseq, numa_node, &memid);
+    page = (mi_page_t*)mi_arenas_try_alloc(tld->subproc, slice_count, page_alignment, commit, allow_large, req_arena, tld->thread_seq, numa_node, &memid);
     if (page != NULL) {
       mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count));
       mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index);
@@ -701,6 +701,11 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice
 
   // register in the page map
   _mi_page_map_register(page);
+
+  // stats
+  mi_tld_stat_increase(tld, pages, 1);
+  mi_tld_stat_increase(tld, page_bins[_mi_page_bin(page)], 1);
+
   mi_assert_internal(_mi_ptr_page(page)==page);
   mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
   mi_assert_internal(mi_page_block_size(page) == block_size);
@@ -724,7 +729,7 @@ static mi_page_t* mi_arenas_page_regular_alloc(mi_heap_t* heap, size_t slice_cou
   const long commit_on_demand = mi_option_get(mi_option_page_commit_on_demand);
   const bool commit = (slice_count <= mi_slice_count_of_size(MI_PAGE_MIN_COMMIT_SIZE) ||  // always commit small pages
                        (commit_on_demand == 2 && _mi_os_has_overcommit()) || (commit_on_demand == 0));
-  page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq, heap->numa_node, commit);
+  page = mi_arenas_page_alloc_fresh(slice_count, block_size, 1, req_arena, heap->numa_node, commit, tld);
   if (page != NULL) {
     mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count);
     _mi_page_init(heap, page);
@@ -746,7 +751,7 @@ static mi_page_t* mi_arenas_page_singleton_alloc(mi_heap_t* heap, size_t block_s
   const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, _mi_os_secure_guard_page_size()) + _mi_os_secure_guard_page_size());
   #endif
 
-  mi_page_t* page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq, heap->numa_node, true /* commit singletons always */);
+  mi_page_t* page = mi_arenas_page_alloc_fresh(slice_count, block_size, block_alignment, req_arena, heap->numa_node, true /* commit singletons always */, tld);
   if (page == NULL) return NULL;
 
   mi_assert(page->reserved == 1);
@@ -785,7 +790,7 @@ mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t bloc
   return page;
 }
 
-void _mi_arenas_page_free(mi_page_t* page) {
+void _mi_arenas_page_free(mi_page_t* page, mi_tld_t* stats_tld /* can be NULL */) {
   mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
   mi_assert_internal(_mi_ptr_page(page)==page);
   mi_assert_internal(mi_page_is_owned(page));
@@ -793,6 +798,15 @@ void _mi_arenas_page_free(mi_page_t* page) {
   mi_assert_internal(mi_page_is_abandoned(page));
   mi_assert_internal(page->next==NULL && page->prev==NULL);
 
+  if (stats_tld != NULL) { 
+    mi_tld_stat_decrease(stats_tld, page_bins[_mi_page_bin(page)], 1);
+    mi_tld_stat_decrease(stats_tld, pages, 1);
+  }
+  else {
+    mi_os_stat_decrease(page_bins[_mi_page_bin(page)], 1);
+    mi_os_stat_decrease(pages, 1);
+  }
+
   #if MI_DEBUG>1
   if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) {
     size_t bin = _mi_bin(mi_page_block_size(page));
diff --git a/src/free.c b/src/free.c
index 9ddd1f19..40e81380 100644
--- a/src/free.c
+++ b/src/free.c
@@ -217,7 +217,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t*
     // first remove it from the abandoned pages in the arena (if mapped, this might wait for any readers to finish)
     _mi_arenas_page_unabandon(page);
     // we can free the page directly
-    _mi_arenas_page_free(page);
+    _mi_arenas_page_free(page,NULL);
     return;
   }
 
@@ -300,7 +300,7 @@ static bool mi_page_unown_from_free(mi_page_t* page, mi_block_t* mt_free) {
       _mi_page_free_collect(page,false);  // update used
       if (mi_page_all_free(page)) {   // it may become free just before unowning it
         _mi_arenas_page_unabandon(page);
-        _mi_arenas_page_free(page);
+        _mi_arenas_page_free(page,NULL);
         return true;
       }
       tf_expect = mi_atomic_load_relaxed(&page->xthread_free);
diff --git a/src/heap.c b/src/heap.c
index 971aad68..fb0a6c9d 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -117,11 +117,11 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   // python/cpython#112532: we may be called from a thread that is not the owner of the heap
   // const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id());
 
+  // if (_mi_is_main_thread()) { mi_debug_show_arenas(true, false, false); }
+
   // collect retired pages
   _mi_heap_collect_retired(heap, force);
 
-  // if (_mi_is_main_thread()) { mi_debug_show_arenas(true, false, false); }
-
   // collect all pages owned by this thread
   mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
 
@@ -130,9 +130,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   _mi_arenas_collect(collect == MI_FORCE /* force purge? */, collect >= MI_FORCE /* visit all? */, heap->tld);
 
   // merge statistics
-  if (collect <= MI_FORCE) {
-    mi_stats_merge();
-  }
+  if (collect <= MI_FORCE) { _mi_stats_merge_thread(heap->tld); }
 }
 
 void _mi_heap_collect_abandon(mi_heap_t* heap) {
@@ -331,7 +329,6 @@ mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag) {
 static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
   MI_UNUSED(arg1);
   MI_UNUSED(arg2);
-  MI_UNUSED(heap);
   MI_UNUSED(pq);
 
   // ensure no more thread_delayed_free will be added
@@ -363,7 +360,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
   page->next = NULL;
   page->prev = NULL;
   mi_page_set_heap(page, NULL);
-  _mi_arenas_page_free(page);
+  _mi_arenas_page_free(page, heap->tld);
 
   return true; // keep going
 }
diff --git a/src/init.c b/src/init.c
index 3b143a37..8336fdb9 100644
--- a/src/init.c
+++ b/src/init.c
@@ -16,7 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Empty page used to initialize the small free pages array
 const mi_page_t _mi_page_empty = {
-  MI_ATOMIC_VAR_INIT(0),  // xthread_id 
+  MI_ATOMIC_VAR_INIT(0),  // xthread_id
   NULL,                   // free
   0,                      // used
   0,                      // capacity
@@ -709,6 +709,7 @@ void mi_process_init(void) mi_attr_noexcept {
   _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
 
   mi_detect_cpu_features();
+  _mi_stats_init();
   _mi_os_init();
   _mi_page_map_init();
   mi_heap_main_init();
@@ -725,7 +726,7 @@ void mi_process_init(void) mi_attr_noexcept {
   _mi_prim_thread_associate_default_heap(NULL);
   #endif
 
-  mi_stats_reset();  // only call stat reset *after* thread init (or the heap tld == NULL)
+  // mi_stats_reset();  // only call stat reset *after* thread init (or the heap tld == NULL)
   mi_track_init();
 
   if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
diff --git a/src/page-map.c b/src/page-map.c
index c8686924..f80e4226 100644
--- a/src/page-map.c
+++ b/src/page-map.c
@@ -78,7 +78,7 @@ void _mi_page_map_unsafe_destroy(void) {
   _mi_page_map = NULL;
   mi_page_map_commit = NULL;
   mi_page_map_max_address = NULL;
-  mi_page_map_memid = _mi_memid_none();  
+  mi_page_map_memid = _mi_memid_none();
 }
 
 
@@ -173,7 +173,7 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att
 // A 2-level page map
 #define MI_PAGE_MAP_SUB_SIZE    (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))
 
-mi_decl_cache_align mi_page_t*** _mi_page_map;
+mi_decl_cache_align _Atomic(mi_page_t**)* _mi_page_map;
 static size_t       mi_page_map_count;
 static void*        mi_page_map_max_address;
 static mi_memid_t   mi_page_map_memid;
@@ -200,10 +200,15 @@ bool _mi_page_map_init(void) {
   mi_assert(mi_page_map_count <= MI_PAGE_MAP_COUNT);
   const size_t os_page_size = _mi_os_page_size();
   const size_t page_map_size = _mi_align_up( mi_page_map_count * sizeof(mi_page_t**), os_page_size);
-  const size_t reserve_size = page_map_size + os_page_size;
+  const size_t submap_size = MI_PAGE_MAP_SUB_SIZE;
+  const size_t reserve_size = page_map_size + submap_size;
+  #if MI_SECURE
+  const bool commit = true;  // the whole page map is valid and we can reliably check any pointer
+  #else
   const bool commit = page_map_size <= 64*MI_KiB ||
                       mi_option_is_enabled(mi_option_pagemap_commit) || _mi_os_has_overcommit();
-  _mi_page_map = (mi_page_t***)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
+  #endif
+  _mi_page_map = (_Atomic(mi_page_t**)*)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
   if (_mi_page_map==NULL) {
     _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
     return false;
@@ -220,10 +225,10 @@ bool _mi_page_map_init(void) {
   }
   _mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size);  // we reserved a submap part at the end already
   if (!mi_page_map_memid.initially_committed) {
-    _mi_os_commit(_mi_page_map[0], os_page_size, NULL);   // only first OS page
+    _mi_os_commit(_mi_page_map[0], submap_size, NULL);    // commit full submap (issue #1087)
   }
-  if (!mi_page_map_memid.initially_zero) {                // initialize first addresses with NULL
-    _mi_memzero_aligned(_mi_page_map[0], os_page_size);
+  if (!mi_page_map_memid.initially_zero) {                // initialize low addresses with NULL
+    _mi_memzero_aligned(_mi_page_map[0], submap_size);
   }
 
   mi_assert_internal(_mi_ptr_page(NULL)==NULL);
@@ -233,14 +238,14 @@ bool _mi_page_map_init(void) {
 void _mi_page_map_unsafe_destroy(void) {
   mi_assert_internal(_mi_page_map != NULL);
   if (_mi_page_map == NULL) return;
-  for (size_t idx = 1; idx < mi_page_map_count; idx++) {  // skip entry 0
+  for (size_t idx = 1; idx < mi_page_map_count; idx++) {  // skip entry 0 (as we allocate that submap at the end of the page_map)
     // free all sub-maps
     if (mi_page_map_is_committed(idx, NULL)) {
-      mi_page_t** sub = _mi_page_map[idx];
+      mi_page_t** sub = _mi_page_map_at(idx);
       if (sub != NULL) {
-        mi_memid_t memid = _mi_memid_create_os(sub, MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), true, false, false);
+        mi_memid_t memid = _mi_memid_create_os(sub, MI_PAGE_MAP_SUB_SIZE, true, false, false);
         _mi_os_free(memid.mem.os.base, memid.mem.os.size, memid);
-        _mi_page_map[idx] = NULL;
+        mi_atomic_store_ptr_release(mi_page_t*, &_mi_page_map[idx], NULL);
       }
     }
   }
@@ -270,7 +275,7 @@ static mi_page_t** mi_page_map_ensure_committed(size_t idx) {
     _mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL);
     mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx);
   }
-  return _mi_page_map[idx];
+  return mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // _mi_page_map_at(idx);
 }
 
 static mi_page_t** mi_page_map_ensure_at(size_t idx) {
@@ -279,7 +284,7 @@ static mi_page_t** mi_page_map_ensure_at(size_t idx) {
     // sub map not yet allocated, alloc now
     mi_memid_t memid;
     mi_page_t** expect = sub;
-    const size_t submap_size = MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*);
+    const size_t submap_size = MI_PAGE_MAP_SUB_SIZE;
     sub = (mi_page_t**)_mi_os_alloc(submap_size, &memid);
     if (sub == NULL) {
       _mi_error_message(EFAULT, "internal error: unable to extend the page map\n");
@@ -288,7 +293,7 @@ static mi_page_t** mi_page_map_ensure_at(size_t idx) {
     if (!memid.initially_zero) {
       _mi_memzero_aligned(sub, submap_size);
     }
-    if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, ((_Atomic(mi_page_t**)*)&_mi_page_map[idx]), &expect, sub)) {
+    if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, &_mi_page_map[idx], &expect, sub)) {
       // another thread already allocated it.. free and continue
       _mi_os_free(sub, submap_size, memid);
       sub = expect;
diff --git a/src/page-queue.c b/src/page-queue.c
index 3e2315cc..91bb0ef9 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -168,8 +168,7 @@ bool _mi_page_queue_is_valid(mi_heap_t* heap, const mi_page_queue_t* pq) {
   return true;
 }
 
-
-static size_t mi_page_bin(const mi_page_t* page) {
+size_t _mi_page_bin(const mi_page_t* page) {
   const size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page))));
   mi_assert_internal(bin <= MI_BIN_FULL);
   return bin;
@@ -177,7 +176,7 @@ static size_t mi_page_bin(const mi_page_t* page) {
 
 static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
   mi_assert_internal(heap!=NULL);
-  const size_t bin = mi_page_bin(page);
+  const size_t bin = _mi_page_bin(page);
   mi_page_queue_t* pq = &heap->pages[bin];
   mi_assert_internal((mi_page_block_size(page) == pq->block_size) ||
                        (mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) ||
diff --git a/src/page.c b/src/page.c
index 0d8e4e12..e9b0e784 100644
--- a/src/page.c
+++ b/src/page.c
@@ -322,9 +322,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
   else if (pq != NULL) {
     mi_page_queue_push(heap, pq, page);
   }
-  mi_heap_stat_increase(heap, pages, 1);
   mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
-  mi_heap_stat_increase(heap, page_bins[mi_page_bin(page)], 1);
   mi_assert_expensive(_mi_page_is_valid(page));
   return page;
 }
@@ -394,12 +392,10 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq) {
   mi_page_queue_remove(pq, page);
 
   // and free it
-  mi_heap_t* heap = page->heap;
-  mi_heap_stat_decrease(heap, page_bins[mi_page_bin(page)], 1);
-  mi_heap_stat_decrease(heap, pages, 1);
+  mi_tld_t* const tld = page->heap->tld;
   mi_page_set_heap(page,NULL);
-  _mi_arenas_page_free(page);
-  _mi_arenas_collect(false, false, heap->tld);  // allow purging
+  _mi_arenas_page_free(page,tld);
+  _mi_arenas_collect(false, false, tld);  // allow purging
 }
 
 #define MI_MAX_RETIRE_SIZE    MI_LARGE_OBJ_SIZE_MAX   // should be less than size for MI_BIN_HUGE
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index dcefdbed..4f00294c 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -18,8 +18,7 @@ terms of the MIT license. A copy of the license can be found in the file
 //---------------------------------------------
 
 #if defined(_MSC_VER)
-#pragma warning(disable:28159)  // don't use GetVersion
-#pragma warning(disable:4996)   // don't use GetVersion
+#pragma warning(disable:4996)   // don't use GetVersionExW
 #endif
 
 static DWORD win_major_version = 6;
@@ -72,6 +71,8 @@ static PGetNumaProcessorNode        pGetNumaProcessorNode = NULL;
 
 // Available after Windows XP
 typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes );
+typedef BOOL (__stdcall* PGetVersionExW)(LPOSVERSIONINFOW lpVersionInformation);
+
 
 //---------------------------------------------
 // Enable large page support dynamically (if possible)
@@ -126,14 +127,9 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
   config->has_overcommit = false;
   config->has_partial_free = false;
   config->has_virtual_reserve = true;
-  // windows version
-  OSVERSIONINFOW version; _mi_memzero_var(version);
-  if (GetVersionExW(&version)) {
-    win_major_version = version.dwMajorVersion;
-    win_minor_version = version.dwMinorVersion;
-  }
+  
   // get the page size
-  SYSTEM_INFO si;
+  SYSTEM_INFO si; _mi_memzero_var(si);
   GetSystemInfo(&si);
   if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
   if (si.dwAllocationGranularity > 0) {
@@ -147,8 +143,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
   }
 
   // get the VirtualAlloc2 function
-  HINSTANCE  hDll;
-  hDll = LoadLibrary(TEXT("kernelbase.dll"));
+  HINSTANCE hDll = LoadLibrary(TEXT("kernelbase.dll"));
   if (hDll != NULL) {
     // use VirtualAlloc2FromApp if possible as it is available to Windows store apps
     pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
@@ -178,6 +173,16 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
         }
       }
     }
+    // Get Windows version
+    PGetVersionExW pGetVersionExW = (PGetVersionExW)(void (*)(void))GetProcAddress(hDll, "GetVersionExW");
+    if (pGetVersionExW != NULL) {
+      OSVERSIONINFOW version; _mi_memzero_var(version);
+      version.dwOSVersionInfoSize = sizeof(version);
+      if ((*pGetVersionExW)(&version)) {
+        win_major_version = version.dwMajorVersion;
+        win_minor_version = version.dwMinorVersion;
+      }
+    }
     FreeLibrary(hDll);
   }
   // Enable large/huge OS page support?
diff --git a/src/stats.c b/src/stats.c
index 3b53d5a9..ab4c8d6e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -362,7 +362,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   mi_stat_counter_print(&stats->malloc_guarded_count, "guarded", out, arg);
   mi_stat_print(&stats->threads, "threads", -1, out, arg);
   mi_stat_counter_print_avg(&stats->page_searches, "searches", out, arg);
-  _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count());
+  _mi_fprintf(out, arg, "%10s: %5i\n", "numa nodes", _mi_os_numa_node_count());
 
   size_t elapsed;
   size_t user_time;
@@ -373,9 +373,9 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
   size_t peak_commit;
   size_t page_faults;
   mi_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
-  _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
-  _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process",
-              user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults );
+  _mi_fprintf(out, arg, "%10s: %5zu.%03zu s\n", "elapsed", elapsed/1000, elapsed%1000);
+  _mi_fprintf(out, arg, "%10s: user: %zu.%03zu s, system: %zu.%03zu s, faults: %zu, rss: ", "process",
+              user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, page_faults );
   mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s");
   if (peak_commit > 0) {
     _mi_fprintf(out, arg, ", commit: ");
@@ -386,9 +386,15 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
 
 static mi_msecs_t mi_process_start; // = 0
 
+// called on process init
+void _mi_stats_init(void) {
+  if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); };
+}
+
+
 // return thread local stats
 static mi_stats_t* mi_get_tld_stats(void) {
-  return &mi_heap_get_default()->tld->stats;
+  return &_mi_thread_tld()->stats;
 }
 
 void mi_stats_reset(void) mi_attr_noexcept {
@@ -396,10 +402,12 @@ void mi_stats_reset(void) mi_attr_noexcept {
   mi_subproc_t* subproc = _mi_subproc();
   if (stats != &subproc->stats) { _mi_memzero(stats, sizeof(mi_stats_t)); }
   _mi_memzero(&subproc->stats, sizeof(mi_stats_t));
-  if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); };
+  _mi_stats_init();
 }
 
+
 void _mi_stats_merge_from(mi_stats_t* to, mi_stats_t* from) {
+  mi_assert_internal(to != NULL && from != NULL);
   if (to != from) {
     mi_stats_add(to, from);
     _mi_memzero(from, sizeof(mi_stats_t));
@@ -410,8 +418,13 @@ void _mi_stats_done(mi_stats_t* stats) {  // called from `mi_thread_done`
   _mi_stats_merge_from(&_mi_subproc()->stats, stats);
 }
 
+void _mi_stats_merge_thread(mi_tld_t* tld) {
+  mi_assert_internal(tld != NULL && tld->subproc != NULL);
+  _mi_stats_merge_from( &tld->subproc->stats, &tld->stats );
+}
+
 void mi_stats_merge(void) mi_attr_noexcept {
-  _mi_stats_done( mi_get_tld_stats() );
+  _mi_stats_merge_thread( _mi_thread_tld() );
 }
 
 void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
@@ -519,7 +532,7 @@ static bool mi_heap_buf_expand(mi_heap_buf_t* hbuf) {
     hbuf->buf[hbuf->size-1] = 0;
   }
   if (hbuf->size > SIZE_MAX/2 || !hbuf->can_realloc) return false;
-  const size_t newsize = (hbuf->size == 0 ? 2*MI_KiB : 2*hbuf->size);
+  const size_t newsize = (hbuf->size == 0 ? mi_good_size(12*MI_KiB) : 2*hbuf->size);
   char* const  newbuf  = (char*)mi_rezalloc(hbuf->buf, newsize);
   if (newbuf == NULL) return false;
   hbuf->buf = newbuf;
@@ -605,6 +618,7 @@ static void mi_heap_buf_print_counter_value(mi_heap_buf_t* hbuf, const char* nam
 #define MI_STAT_COUNTER(stat)  mi_heap_buf_print_counter_value(&hbuf, #stat, &stats->stat);
 
 char* mi_stats_get_json(size_t output_size, char* output_buf) mi_attr_noexcept {
+  mi_stats_merge();
   mi_heap_buf_t hbuf = { NULL, 0, 0, true };
   if (output_size > 0 && output_buf != NULL) {
     _mi_memzero(output_buf, output_size);
diff --git a/test/test-api.c b/test/test-api.c
index 20d85314..fa8fc3cd 100644
--- a/test/test-api.c
+++ b/test/test-api.c
@@ -86,9 +86,14 @@ int main(void) {
   CHECK_BODY("malloc-nomem1") {
     result = (mi_malloc((size_t)PTRDIFF_MAX + (size_t)1) == NULL);
   };
-  CHECK_BODY("malloc-null") {
+  CHECK_BODY("malloc-free-null") {
     mi_free(NULL);
   };
+  #if MI_INTPTR_BITS > 32
+  CHECK_BODY("malloc-free-invalid-low") {
+    mi_free((void*)(MI_ZU(0x0000000003990080))); // issue #1087
+  };
+  #endif
   CHECK_BODY("calloc-overflow") {
     // use (size_t)&mi_calloc to get some number without triggering compiler warnings
     result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL);
@@ -163,7 +168,7 @@ int main(void) {
     void* p = mi_malloc_aligned(4097,4096);
     size_t usable = mi_usable_size(p);
     result = (usable >= 4097 && usable < 16000);
-    printf("malloc_aligned5: usable size: %zi\n", usable);
+    fprintf(stderr, "malloc_aligned5: usable size: %zi.  ", usable);
     mi_free(p);
   };
   /*
diff --git a/test/test-stress.c b/test/test-stress.c
index cd63b05e..a2e2b377 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -374,7 +374,8 @@ int main(int argc, char** argv) {
     mi_free(json);
   }
   #endif
-  mi_stats_print(NULL);  
+  mi_collect(true);
+  mi_stats_print(NULL);
 #endif
   //bench_end_program();
   return 0;