diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 119b7b93..d4ec8bb7 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -63,6 +63,7 @@ void       _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
 void       _mi_warning_message(const char* fmt, ...);
 void       _mi_verbose_message(const char* fmt, ...);
 void       _mi_trace_message(const char* fmt, ...);
+void       _mi_output_message(const char* fmt, ...);
 void       _mi_options_init(void);
 long       _mi_option_get_fast(mi_option_t option);
 void       _mi_error_message(int err, const char* fmt, ...);
diff --git a/src/arena.c b/src/arena.c
index 632c7a2a..424a9c70 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -429,15 +429,7 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t
   return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld);
 }
 
-static uint8_t* xmi_arena_page_allocated_area(mi_page_t* page, size_t* psize) {
-  // todo: record real allocated size instead of trying to recalculate?
-  size_t page_size;
-  uint8_t* const pstart = mi_page_area(page, &page_size);
-  const size_t diff = pstart - (uint8_t*)page;
-  const size_t size = _mi_align_up(page_size + diff, MI_ARENA_BLOCK_SIZE);
-  if (psize != NULL) { *psize = size; }
-  return (uint8_t*)page;
-}
+
 
 /* -----------------------------------------------------------
   Arena page allocation
@@ -445,6 +437,7 @@ static uint8_t* xmi_arena_page_allocated_area(mi_page_t* page, size_t* psize) {
 
 static mi_page_t* mi_arena_page_try_find_abandoned(size_t block_count, size_t block_size, mi_arena_id_t req_arena_id, mi_tld_t* tld)
 {
+  MI_UNUSED(block_count);
   const size_t bin = _mi_bin(block_size); 
   mi_assert_internal(bin < MI_BIN_COUNT);
 
@@ -693,7 +686,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
     }
     mi_assert_internal(block_idx < arena->block_count);
     mi_assert_internal(block_idx >= mi_arena_info_blocks());
-    if (block_idx <= mi_arena_info_blocks() || block_idx > arena->block_count) {
+    if (block_idx < mi_arena_info_blocks() || block_idx > arena->block_count) {
       _mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
       return;
     }
@@ -926,8 +919,8 @@ static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf) {
   return bit_set_count;
 }
 
-static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_t* bitmap) {
-  _mi_verbose_message("%s%s:\n", prefix, header);
+static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_t* bitmap, bool invert) {
+  _mi_output_message("%s%s:\n", prefix, header);
   size_t bit_count = 0;
   size_t bit_set_count = 0;
   for (int i = 0; i < MI_BFIELD_BITS && bit_count < block_count; i++) {
@@ -935,7 +928,11 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
     mi_bitmap_chunk_t* chunk = &bitmap->chunks[i];
     for (int j = 0, k = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) {
       if (bit_count < block_count) {
-        bit_set_count += mi_debug_show_bfield(chunk->bfields[j], buf + k);
+        mi_bfield_t bfield = chunk->bfields[j];
+        if (invert) bfield = ~bfield;
+        size_t xcount = mi_debug_show_bfield(bfield, buf + k);
+        if (invert) xcount = MI_BFIELD_BITS - xcount;
+        bit_set_count += xcount;
         k += MI_BFIELD_BITS;
         buf[k++] = ' ';
       }
@@ -946,9 +943,9 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
       bit_count += MI_BFIELD_BITS;      
     }
     
-    _mi_verbose_message("%s  %s\n", prefix, buf);
+    _mi_output_message("%s  %s\n", prefix, buf);
   }
-  _mi_verbose_message("%s  total ('x'): %zu\n", prefix, bit_set_count);
+  _mi_output_message("%s  total ('x'): %zu\n", prefix, bit_set_count);
   return bit_set_count;
 }
 
@@ -963,19 +960,19 @@ void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge)
     mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
     if (arena == NULL) break;
     block_total += arena->block_count;
-    _mi_verbose_message("arena %zu: %zu blocks (%zu MiB)%s\n", i, arena->block_count, mi_size_of_blocks(arena->block_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""));
+    _mi_output_message("arena %zu: %zu blocks (%zu MiB)%s\n", i, arena->block_count, mi_size_of_blocks(arena->block_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""));
     if (show_inuse) {
-      free_total += mi_debug_show_bitmap("  ", "free blocks", arena->block_count, &arena->blocks_free);
+      free_total += mi_debug_show_bitmap("  ", "in-use blocks", arena->block_count, &arena->blocks_free, true);
     }
-    mi_debug_show_bitmap("  ", "committed blocks", arena->block_count, &arena->blocks_committed);
+    mi_debug_show_bitmap("  ", "committed blocks", arena->block_count, &arena->blocks_committed, false);
     // todo: abandoned blocks
     if (show_purge) {
-      purge_total += mi_debug_show_bitmap("  ", "purgeable blocks", arena->block_count, &arena->blocks_purge);
+      purge_total += mi_debug_show_bitmap("  ", "purgeable blocks", arena->block_count, &arena->blocks_purge, false);
     }
   }
-  if (show_inuse)     _mi_verbose_message("total inuse blocks    : %zu\n", block_total - free_total);
+  if (show_inuse)     _mi_output_message("total inuse blocks    : %zu\n", block_total - free_total);
   // if (show_abandoned) _mi_verbose_message("total abandoned blocks: %zu\n", abandoned_total);
-  if (show_purge)     _mi_verbose_message("total purgeable blocks: %zu\n", purge_total);
+  if (show_purge)     _mi_output_message("total purgeable blocks: %zu\n", purge_total);
 }
 
 
diff --git a/src/options.c b/src/options.c
index 2eaf29a3..8cb0d216 100644
--- a/src/options.c
+++ b/src/options.c
@@ -438,6 +438,13 @@ static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix
   }
 }
 
+void _mi_output_message(const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  mi_vfprintf(NULL, NULL, NULL, fmt, args);
+  va_end(args);
+}
+
 void _mi_trace_message(const char* fmt, ...) {
   if (mi_option_get(mi_option_verbose) <= 1) return;  // only with verbose level 2 or higher
   va_list args;
diff --git a/src/page-map.c b/src/page-map.c
index e803a367..f52fab10 100644
--- a/src/page-map.c
+++ b/src/page-map.c
@@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 mi_decl_cache_align signed char* _mi_page_map = NULL;
 static bool        mi_page_map_all_committed = false;
-static size_t      mi_page_map_size_per_commit_bit = MI_ARENA_BLOCK_SIZE;
+static size_t      mi_page_map_entries_per_commit_bit = MI_ARENA_BLOCK_SIZE;
 static mi_memid_t  mi_page_map_memid;
 static mi_bitmap_t mi_page_map_commit;
 
@@ -22,7 +22,7 @@ static bool mi_page_map_init(void) {
   //                    64 KiB for 4 GiB address space (on 32-bit)
   const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_BLOCK_SHIFT));
   
-  mi_page_map_size_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);  
+  mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);  
 
   mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems
   _mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
@@ -42,16 +42,16 @@ static bool mi_page_map_init(void) {
   return true;
 }
 
-static void mi_page_map_ensure_committed(void* p, size_t idx, size_t block_count) {
+static void mi_page_map_ensure_committed(size_t idx, size_t block_count) {
   // is the page map area that contains the page address committed?
   if (!mi_page_map_all_committed) {
-    const size_t commit_bit_count = _mi_divide_up(block_count, mi_page_map_size_per_commit_bit);
-    const size_t commit_bit_idx = idx / mi_page_map_size_per_commit_bit;
-    for (size_t i = 0; i < commit_bit_count; i++) {  // per bit to avoid crossing over bitmap chunks
-      if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, commit_bit_idx + i, 1)) {
+    const size_t commit_bit_idx_lo = idx / mi_page_map_entries_per_commit_bit;
+    const size_t commit_bit_idx_hi = (idx + block_count - 1) / mi_page_map_entries_per_commit_bit;
+    for (size_t i = commit_bit_idx_lo; i <= commit_bit_idx_hi; i++) {  // per bit to avoid crossing over bitmap chunks
+      if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, i, 1)) {
         // this may race, in which case we do multiple commits (which is ok)
-        _mi_os_commit(_mi_page_map + ((commit_bit_idx + i)*mi_page_map_size_per_commit_bit), mi_page_map_size_per_commit_bit, NULL, NULL);
-        mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, commit_bit_idx + i, 1, NULL);
+        _mi_os_commit(_mi_page_map + (i*mi_page_map_entries_per_commit_bit), mi_page_map_entries_per_commit_bit, NULL, NULL);
+        mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, i, 1, NULL);
       }
     }
   }
@@ -71,11 +71,12 @@ void _mi_page_map_register(mi_page_t* page) {
   if mi_unlikely(_mi_page_map == NULL) {
     if (!mi_page_map_init()) return;
   }
+  mi_assert(_mi_page_map!=NULL);
   uint8_t* page_start;
   size_t   block_count;
   const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
   
-  mi_page_map_ensure_committed(page_start, idx, block_count);
+  mi_page_map_ensure_committed(idx, block_count);
 
   // set the offsets
   for (int i = 0; i < (int)block_count; i++) {
@@ -100,7 +101,7 @@ void _mi_page_map_unregister(mi_page_t* page) {
 
 mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
   uintptr_t idx = ((uintptr_t)p >> MI_ARENA_BLOCK_SHIFT);
-  if (!mi_page_map_all_committed || mi_bitmap_is_xsetN(MI_BIT_SET, &mi_page_map_commit, idx/mi_page_map_size_per_commit_bit, 1)) {
+  if (!mi_page_map_all_committed || mi_bitmap_is_xsetN(MI_BIT_SET, &mi_page_map_commit, idx/mi_page_map_entries_per_commit_bit, 1)) {
     return (_mi_page_map[idx] != 0);
   }
   else {
diff --git a/src/page-queue.c b/src/page-queue.c
index c6b19985..3fcd700d 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -112,7 +112,7 @@ size_t _mi_bin_size(uint8_t bin) {
 }
 
 // Good size for allocation
-size_t mi_good_size(size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept {
   if (size <= MI_LARGE_MAX_OBJ_SIZE) {
     return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE));
   }
diff --git a/src/page.c b/src/page.c
index b6af4fd0..f8ef641e 100644
--- a/src/page.c
+++ b/src/page.c
@@ -638,7 +638,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) {
 
   // calculate the extend count
   const size_t bsize = mi_page_block_size(page);
-  size_t extend = page->reserved - page->capacity;
+  size_t extend = (size_t)page->reserved - page->capacity;
   mi_assert_internal(extend > 0);
 
   size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/bsize);
@@ -672,7 +672,7 @@ void _mi_page_init(mi_heap_t* heap, mi_page_t* page) {
   mi_assert(page != NULL);
   mi_page_set_heap(page, heap);
   size_t page_size;
-  uint8_t* page_start = mi_page_area(page, &page_size);
+  uint8_t* page_start = mi_page_area(page, &page_size); MI_UNUSED(page_start);
   mi_track_mem_noaccess(page_start,page_size);
   mi_assert_internal(page_size / mi_page_block_size(page) < (1L<<16));
   mi_assert_internal(page->reserved > 0);
diff --git a/src/stats.c b/src/stats.c
index 14489937..9f7a3cf0 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -133,7 +133,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
 // unit == 0: count as decimal
 // unit < 0 : count in binary
 static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
-  char buf[32]; buf[0] = 0;
+  char buf[32]; _mi_memzero_var(buf);
   int  len = 32;
   const char* suffix = (unit <= 0 ? " " : "B");
   const int64_t base = (unit == 0 ? 1000 : 1024);
@@ -298,7 +298,7 @@ static void mi_cdecl mi_buffered_out(const char* msg, void* arg) {
 
 static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
   // wrap the output function to be line buffered
-  char buf[256];
+  char buf[256]; _mi_memzero_var(buf);
   buffered_t buffer = { out0, arg0, NULL, 0, 255 };
   buffer.buf = buf;
   mi_output_fun* out = &mi_buffered_out;
diff --git a/test/test-stress.c b/test/test-stress.c
index e287cfa7..6327e995 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -48,13 +48,13 @@ static int ITER    = 20;
 static int THREADS = 32;      // more repeatable if THREADS <= #processors
 static int SCALE   = 25;      // scaling factor
 static int ITER    = 50;      // N full iterations destructing and re-creating all threads
-#endif
+#endif  
 
 
 
 #define STRESS                // undefine for leak test
 
-static bool   allow_large_objects = true;     // allow very large objects? (set to `true` if SCALE>100)
+static bool   allow_large_objects = false;     // allow very large objects? (set to `true` if SCALE>100)
 static size_t use_one_size = 0;               // use single object size of `N * sizeof(uintptr_t)`?
 
 static bool   main_participates = false;       // main thread participates as a worker too
@@ -244,7 +244,7 @@ static void test_stress(void) {
     //mi_debug_show_arenas();
     #endif
     #if !defined(NDEBUG) || defined(MI_TSAN)
-    if (true) // (n + 1) % 10 == 0) 
+    if ((n + 1) % 10 == 0) 
       { printf("- iterations left: %3d\n", ITER - (n + 1)); }
     #endif
   }
@@ -320,7 +320,7 @@ int main(int argc, char** argv) {
 
 #ifndef USE_STD_MALLOC
   #ifndef NDEBUG
-  mi_debug_show_arenas(true,true,true);
+  mi_debug_show_arenas(true,true,false);
   mi_collect(true);
   #endif
   // mi_stats_print(NULL);
@@ -345,9 +345,10 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
   thread_entry_fun = fun;
   DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
   HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
+  thandles[0] = GetCurrentThread(); // avoid lint warning
   const size_t start = (main_participates ? 1 : 0);
   for (size_t i = start; i < nthreads; i++) {
-    thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
+    thandles[i] = CreateThread(0, 8*1024L, &thread_entry, (void*)(i), 0, &tids[i]);
   }
   if (main_participates) fun(0); // run the main thread as well
   for (size_t i = start; i < nthreads; i++) {