diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index 98dee520..9f967d94 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -92,7 +92,7 @@
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>Level4</WarningLevel>
       <Optimization>Disabled</Optimization>
       <SDLCheck>true</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
@@ -138,7 +138,7 @@
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>Level4</WarningLevel>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <ConformanceMode>true</ConformanceMode>
@@ -166,7 +166,7 @@
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
+      <WarningLevel>Level4</WarningLevel>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <ConformanceMode>true</ConformanceMode>
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 6b416b17..cd6a4b6f 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -195,11 +195,11 @@ bool        _mi_page_is_valid(mi_page_t* page);
 /* -----------------------------------------------------------
   Inlined definitions
 ----------------------------------------------------------- */
-#define UNUSED(x)     (void)(x)
+#define MI_UNUSED(x)     (void)(x)
 #if (MI_DEBUG>0)
-#define UNUSED_RELEASE(x)
+#define MI_UNUSED_RELEASE(x)
 #else
-#define UNUSED_RELEASE(x)  UNUSED(x)
+#define MI_UNUSED_RELEASE(x)  MI_UNUSED(x)
 #endif
 
 #define MI_INIT4(x)   x(),x(),x(),x()
@@ -455,7 +455,7 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
   mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE);
-  uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT;
+  size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
   mi_assert_internal(idx < segment->slice_entries);
   mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
   mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data
@@ -653,7 +653,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
   #ifdef MI_ENCODE_FREELIST
   return (mi_block_t*)mi_ptr_decode(null, block->next, keys);
   #else
-  UNUSED(keys); UNUSED(null);
+  MI_UNUSED(keys); MI_UNUSED(null);
   return (mi_block_t*)block->next;
   #endif
 }
@@ -662,7 +662,7 @@ static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const
   #ifdef MI_ENCODE_FREELIST
   block->next = mi_ptr_encode(null, next, keys);
   #else
-  UNUSED(keys); UNUSED(null);
+  MI_UNUSED(keys); MI_UNUSED(null);
   block->next = (mi_encoded_t)next;
   #endif
 }
@@ -678,7 +678,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t*
   }
   return next;
   #else
-  UNUSED(page);
+  MI_UNUSED(page);
   return mi_block_nextx(page,block,NULL);
   #endif
 }
@@ -687,7 +687,7 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
   #ifdef MI_ENCODE_FREELIST
   mi_block_set_nextx(page,block,next, page->keys);
   #else
-  UNUSED(page);
+  MI_UNUSED(page);
   mi_block_set_nextx(page,block,next,NULL);
   #endif
 }
@@ -787,7 +787,7 @@ static inline size_t _mi_os_numa_node_count(void) {
 #if defined(_WIN32)
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
-static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
+static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
   // Windows: works on Intel and ARM in both 32- and 64-bit
   return (uintptr_t)NtCurrentTeb();
 }
@@ -808,11 +808,11 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
 #elif defined(__x86_64__)
   __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 Linux, BSD uses FS
 #elif defined(__arm__)
-  void** tcb; UNUSED(ofs);
+  void** tcb; MI_UNUSED(ofs);
   __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
   res = tcb[slot];
 #elif defined(__aarch64__)
-  void** tcb; UNUSED(ofs);
+  void** tcb; MI_UNUSED(ofs);
   #if defined(__APPLE__) // M1, issue #343
   __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
   tcb = (void**)((uintptr_t)tcb & ~0x07UL);  // clear lower 3 bits
@@ -836,11 +836,11 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
 #elif defined(__x86_64__)
   __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 Linux, BSD uses FS
 #elif defined(__arm__)
-  void** tcb; UNUSED(ofs);
+  void** tcb; MI_UNUSED(ofs);
   __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
   tcb[slot] = value;
 #elif defined(__aarch64__)
-  void** tcb; UNUSED(ofs);
+  void** tcb; MI_UNUSED(ofs);
   #if defined(__APPLE__) // M1, issue #343
   __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
   tcb = (void**)((uintptr_t)tcb & ~0x07UL);  // clear lower 3 bits
@@ -851,7 +851,7 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
 #endif
 }
 
-static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
+static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
 #if defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__))
   // on Android, slot 1 is the thread ID (pointer to pthread internal struct)
   return (uintptr_t)mi_tls_slot(1);
@@ -862,7 +862,7 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
 }
 #else
 // otherwise use standard C
-static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
+static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
   return (uintptr_t)&_mi_heap_default;
 }
 #endif
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 5bf779fa..cd12418b 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -17,7 +17,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif 
 
 // Minimal alignment necessary. On most platforms 16 bytes are needed
-// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
+// due to SSE registers for example. This must be at least `sizeof(void*)`
 #ifndef MI_MAX_ALIGN_SIZE
 #define MI_MAX_ALIGN_SIZE  16   // sizeof(max_align_t)
 #endif
@@ -67,6 +67,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_ENCODE_FREELIST  1
 #endif
 
+
 // ------------------------------------------------------
 // Platform specific values
 // ------------------------------------------------------
@@ -83,32 +84,43 @@ terms of the MIT license. A copy of the license can be found in the file
 // or otherwise one might define an intptr_t type that is larger than a pointer...
 // ------------------------------------------------------
 
-#if INTPTR_MAX == INT64_MAX
+#if INTPTR_MAX > INT64_MAX
+# define MI_INTPTR_SHIFT (4)  // assume 128-bit  (as on arm CHERI for example)
+#elif INTPTR_MAX == INT64_MAX
 # define MI_INTPTR_SHIFT (3)
 #elif INTPTR_MAX == INT32_MAX
 # define MI_INTPTR_SHIFT (2)
 #else
-#error platform must be 32 or 64 bits
+#error platform pointers must be 32, 64, or 128 bits
+#endif
+
+#if SIZE_MAX == UINT64_MAX
+# define MI_SIZE_SHIFT (3)
+typedef int64_t  mi_ssize_t;
+#elif SIZE_MAX == UINT32_MAX
+# define MI_SIZE_SHIFT (2)
+typedef int32_t  mi_ssize_t;
+#else
+#error platform objects must be 32 or 64 bits
+#endif
+
+#if (SIZE_MAX/2) > LONG_MAX
+# define MI_ZU(x)  x##ULL
+# define MI_ZI(x)  x##LL
+#else
+# define MI_ZU(x)  x##UL
+# define MI_ZI(x)  x##L
 #endif
 
 #define MI_INTPTR_SIZE  (1<<MI_INTPTR_SHIFT)
 #define MI_INTPTR_BITS  (MI_INTPTR_SIZE*8)
 
-#if SIZE_MAX == UINT64_MAX
-# define MI_SIZE_SHIFT (3)
-#elif SIZE_MAX == UINT32_MAX
-# define MI_SIZE_SHIFT (2)
-#else
-#error platform must be 32 or 64 bits
-#endif
+#define MI_SIZE_SIZE  (1<<MI_SIZE_SHIFT)
+#define MI_SIZE_BITS  (MI_SIZE_SIZE*8)
 
-#define MI_SIZE_SIZE  (1<<MI_INTPTR_SHIFT)
-#define MI_SIZE_BITS  (MI_INTPTR_SIZE*8)
-
-
-#define KiB     ((size_t)1024)
-#define MiB     (KiB*KiB)
-#define GiB     (MiB*KiB)
+#define MI_KiB     (MI_ZU(1024))
+#define MI_MiB     (MI_KiB*MI_KiB)
+#define MI_GiB     (MI_MiB*MI_KiB)
 
 
 // ------------------------------------------------------
@@ -125,23 +137,20 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 // Derived constants
-#define MI_SEGMENT_SIZE                   (1<<MI_SEGMENT_SHIFT)
+#define MI_SEGMENT_SIZE                   (MI_ZU(1)<<MI_SEGMENT_SHIFT)
 #define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
 #define MI_SEGMENT_MASK                   (MI_SEGMENT_SIZE - 1)
-#define MI_SEGMENT_SLICE_SIZE             (1<< MI_SEGMENT_SLICE_SHIFT)
+#define MI_SEGMENT_SLICE_SIZE             (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
 #define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 128
 
-#define MI_SMALL_PAGE_SIZE                (1<<MI_SMALL_PAGE_SHIFT)
-#define MI_MEDIUM_PAGE_SIZE               (1<<MI_MEDIUM_PAGE_SHIFT)
+#define MI_SMALL_PAGE_SIZE                (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
+#define MI_MEDIUM_PAGE_SIZE               (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
 
 #define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 8KiB on 64-bit
-
 #define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128KiB on 64-bit
 #define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   
-
 #define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32MiB on 64-bit
 #define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
-
 #define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
 
 // Maximum number of size classes. (spaced exponentially in 12.5% increments)
@@ -162,9 +171,17 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 
+
+// ------------------------------------------------------
+// Mimalloc pages contain allocated blocks
+// ------------------------------------------------------
+
 // The free lists use encoded next fields
 // (Only actually encodes when MI_ENCODED_FREELIST is defined.)
-typedef uintptr_t mi_encoded_t;
+typedef uintptr_t  mi_encoded_t;
+
+// thread id's
+typedef size_t     mi_threadid_t;
 
 // free lists contain blocks
 typedef struct mi_block_s {
@@ -342,7 +359,7 @@ typedef struct mi_segment_s {
 
   // layout like this to optimize access in `mi_free`
   mi_segment_kind_t kind;
-  _Atomic(uintptr_t) thread_id;          // unique id of the thread owning this segment
+  _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
   size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
   mi_slice_t        slices[MI_SLICES_PER_SEGMENT];
 } mi_segment_t;
@@ -403,7 +420,7 @@ struct mi_heap_s {
   mi_page_t*            pages_free_direct[MI_PAGES_DIRECT];  // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
   mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
   _Atomic(mi_block_t*)  thread_delayed_free;
-  uintptr_t             thread_id;                           // thread this heap belongs too
+  mi_threadid_t         thread_id;                           // thread this heap belongs too
   uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)
   uintptr_t             keys[2];                             // two random keys used to encode the `thread_delayed_free` list
   mi_random_ctx_t       random;                              // random number context used for secure allocation
diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c
index 35b95a24..63297c4c 100644
--- a/src/alloc-override-osx.c
+++ b/src/alloc-override-osx.c
@@ -43,43 +43,43 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im
 ------------------------------------------------------ */
 
 static size_t zone_size(malloc_zone_t* zone, const void* p) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   //if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out
   return mi_usable_size(p);
 }
 
 static void* zone_malloc(malloc_zone_t* zone, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return mi_malloc(size);
 }
 
 static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return mi_calloc(count, size);
 }
 
 static void* zone_valloc(malloc_zone_t* zone, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return mi_malloc_aligned(size, _mi_os_page_size());
 }
 
 static void zone_free(malloc_zone_t* zone, void* p) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   mi_free(p);
 }
 
 static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return mi_realloc(p, newsize);
 }
 
 static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return mi_malloc_aligned(size,alignment);
 }
 
 static void zone_destroy(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   // todo: ignore for now?
 }
 
@@ -100,18 +100,18 @@ static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) {
 }
 
 static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) {
-  UNUSED(zone); UNUSED(size);
+  MI_UNUSED(zone); MI_UNUSED(size);
   mi_collect(false);
   return 0;
 }
 
 static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) {
-  UNUSED(size);
+  MI_UNUSED(size);
   zone_free(zone,p);
 }
 
 static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return mi_is_in_heap_region(p);
 }
 
@@ -126,43 +126,43 @@ static kern_return_t intro_enumerator(task_t task, void* p,
                             vm_range_recorder_t recorder)
 {
   // todo: enumerate all memory
-  UNUSED(task); UNUSED(p); UNUSED(type_mask); UNUSED(zone_address);
-  UNUSED(reader); UNUSED(recorder);
+  MI_UNUSED(task); MI_UNUSED(p); MI_UNUSED(type_mask); MI_UNUSED(zone_address);
+  MI_UNUSED(reader); MI_UNUSED(recorder);
   return KERN_SUCCESS;
 }
 
 static size_t intro_good_size(malloc_zone_t* zone, size_t size) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return mi_good_size(size);
 }
 
 static boolean_t intro_check(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return true;
 }
 
 static void intro_print(malloc_zone_t* zone, boolean_t verbose) {
-  UNUSED(zone); UNUSED(verbose);
+  MI_UNUSED(zone); MI_UNUSED(verbose);
   mi_stats_print(NULL);
 }
 
 static void intro_log(malloc_zone_t* zone, void* p) {
-  UNUSED(zone); UNUSED(p);
+  MI_UNUSED(zone); MI_UNUSED(p);
   // todo?
 }
 
 static void intro_force_lock(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   // todo?
 }
 
 static void intro_force_unlock(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   // todo?
 }
 
 static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   // todo...
   stats->blocks_in_use = 0;
   stats->size_in_use = 0;
@@ -171,7 +171,7 @@ static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
 }
 
 static boolean_t intro_zone_locked(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return false;
 }
 
@@ -261,7 +261,7 @@ mi_decl_externc void _malloc_fork_child(void);
 
 
 static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) {
-  UNUSED(size); UNUSED(flags);
+  MI_UNUSED(size); MI_UNUSED(flags);
   return mi_get_default_zone();
 }
 
@@ -274,12 +274,12 @@ static malloc_zone_t* mi_malloc_default_purgeable_zone(void) {
 }
 
 static void mi_malloc_destroy_zone(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   // nothing.
 }
 
 static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) {
-  UNUSED(task); UNUSED(mr);
+  MI_UNUSED(task); MI_UNUSED(mr);
   if (addresses != NULL) *addresses = NULL;
   if (count != NULL) *count = 0;
   return KERN_SUCCESS;
@@ -290,11 +290,11 @@ static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {
 }
 
 static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {  
-  UNUSED(zone); UNUSED(name);
+  MI_UNUSED(zone); MI_UNUSED(name);
 }
 
 static int mi_malloc_jumpstart(uintptr_t cookie) {
-  UNUSED(cookie);
+  MI_UNUSED(cookie);
   return 1; // or 0 for no error?
 }
 
@@ -309,37 +309,37 @@ static void mi__malloc_fork_child(void) {
 }
 
 static void mi_malloc_printf(const char* fmt, ...) {
-  UNUSED(fmt);
+  MI_UNUSED(fmt);
 }
 
 static bool zone_check(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
   return true;
 }
 
 static malloc_zone_t* zone_from_ptr(const void* p) {
-  UNUSED(p);
+  MI_UNUSED(p);
   return mi_get_default_zone();
 }
 
 static void zone_log(malloc_zone_t* zone, void* p) {
-  UNUSED(zone); UNUSED(p);
+  MI_UNUSED(zone); MI_UNUSED(p);
 }
 
 static void zone_print(malloc_zone_t* zone, bool b) {
-  UNUSED(zone); UNUSED(b);
+  MI_UNUSED(zone); MI_UNUSED(b);
 }
 
 static void zone_print_ptr_info(void* p) {
-  UNUSED(p);
+  MI_UNUSED(p);
 }
 
 static void zone_register(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
 }
 
 static void zone_unregister(malloc_zone_t* zone) {
-  UNUSED(zone);
+  MI_UNUSED(zone);
 }
 
 // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
diff --git a/src/alloc-override.c b/src/alloc-override.c
index fa04b460..42fecbb3 100644
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@@ -147,8 +147,8 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
     void* operator new(std::size_t n) noexcept(false)   MI_FORWARD1(mi_new,n)
     void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n)
 
-    void* operator new  (std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); }
-    void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); }
+    void* operator new  (std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); }
+    void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); }
 
     #if (__cplusplus >= 201402L || _MSC_VER >= 1916)
     void operator delete  (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n)
@@ -187,21 +187,21 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   #if (MI_INTPTR_SIZE==8)
     void* _Znwm(size_t n)                             MI_FORWARD1(mi_new,n)  // new 64-bit
     void* _Znam(size_t n)                             MI_FORWARD1(mi_new,n)  // new[] 64-bit
-    void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); }
-    void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); }     
+    void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
+    void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }     
     void* _ZnwmSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
     void* _ZnamSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
-    void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
-    void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
+    void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
+    void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
   #elif (MI_INTPTR_SIZE==4)
     void* _Znwj(size_t n)                             MI_FORWARD1(mi_new,n)  // new 64-bit
     void* _Znaj(size_t n)                             MI_FORWARD1(mi_new,n)  // new[] 64-bit
-    void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); }
-    void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); }   
+    void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
+    void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }   
     void* _ZnwjSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
     void* _ZnajSt11align_val_t(size_t n, size_t al)   MI_FORWARD2(mi_new_aligned, n, al)
-    void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
-    void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
+    void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
+    void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
   #else
     #error "define overloads for new/delete for this platform (just for performance, can be skipped)"
   #endif
diff --git a/src/alloc.c b/src/alloc.c
index ecff9fe8..59b226c1 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -123,7 +123,7 @@ extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
 void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
   // note: we need to initialize the whole usable block size to zero, not just the requested size,
   // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
-  UNUSED(size);
+  MI_UNUSED(size);
   mi_assert_internal(p != NULL);
   mi_assert_internal(mi_usable_size(p) >= size); // size can be zero
   mi_assert_internal(_mi_ptr_page(p)==page);
@@ -205,8 +205,8 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
 }
 #else
 static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
-  UNUSED(page);
-  UNUSED(block);
+  MI_UNUSED(page);
+  MI_UNUSED(block);
   return false;
 }
 #endif
@@ -278,19 +278,19 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co
 }
 #else
 static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
-  UNUSED(page);
-  UNUSED(block);
+  MI_UNUSED(page);
+  MI_UNUSED(block);
 }
 
 static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
-  UNUSED(block);
+  MI_UNUSED(block);
   return mi_page_usable_block_size(page);
 }
 
 static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
-  UNUSED(page);
-  UNUSED(block);
-  UNUSED(min_size);
+  MI_UNUSED(page);
+  MI_UNUSED(block);
+  MI_UNUSED(min_size);
 }
 #endif
 
@@ -298,7 +298,7 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co
 #if (MI_STAT>0)
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
 #if (MI_STAT < 2)  
-  UNUSED(block);
+  MI_UNUSED(block);
 #endif
   mi_heap_t* const heap = mi_heap_get_default();
   const size_t bsize = mi_page_usable_block_size(page);  
@@ -315,7 +315,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
 }
 #else
 static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-  UNUSED(page); UNUSED(block);
+  MI_UNUSED(page); MI_UNUSED(block);
 }
 #endif
 
@@ -333,7 +333,7 @@ static void mi_stat_huge_free(const mi_page_t* page) {
 }
 #else
 static void mi_stat_huge_free(const mi_page_t* page) {
-  UNUSED(page);
+  MI_UNUSED(page);
 }
 #endif
 
@@ -447,7 +447,7 @@ static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool l
 // (and secure mode) if this was a valid pointer.
 static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) 
 {
-  UNUSED(msg);
+  MI_UNUSED(msg);
 #if (MI_DEBUG>0)
   if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) {
     _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
@@ -482,7 +482,7 @@ void mi_free(void* p) mi_attr_noexcept
   const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
   if (mi_unlikely(segment == NULL)) return; 
 
-  const uintptr_t tid = _mi_thread_id();
+  const mi_threadid_t tid = _mi_thread_id();
   mi_page_t* const page = _mi_segment_page_of(segment, p);
 
   if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
@@ -574,19 +574,19 @@ void* _mi_externs[] = {
 // ------------------------------------------------------
 
 void mi_free_size(void* p, size_t size) mi_attr_noexcept {
-  UNUSED_RELEASE(size);
+  MI_UNUSED_RELEASE(size);
   mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
   mi_free(p);
 }
 
 void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
-  UNUSED_RELEASE(alignment);
+  MI_UNUSED_RELEASE(alignment);
   mi_assert(((uintptr_t)p % alignment) == 0);
   mi_free_size(p,size);
 }
 
 void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
-  UNUSED_RELEASE(alignment);
+  MI_UNUSED_RELEASE(alignment);
   mi_assert(((uintptr_t)p % alignment) == 0);
   mi_free(p);
 }
diff --git a/src/arena.c b/src/arena.c
index 3072dbae..6b1e951f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -62,7 +62,7 @@ typedef struct mi_arena_s {
   bool     is_zero_init;                  // is the arena zero initialized?
   bool     allow_decommit;                // is decommit allowed? if true, is_large should be false and blocks_committed != NULL
   bool     is_large;                      // large- or huge OS pages (always committed)
-  _Atomic(uintptr_t) search_idx;          // optimization to start the search for free blocks
+  _Atomic(size_t) search_idx;             // optimization to start the search for free blocks
   mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
   mi_bitmap_field_t* blocks_committed;    // are the blocks committed? (can be NULL for memory that cannot be decommitted)
   mi_bitmap_field_t  blocks_inuse[1];     // in-place bitmap of in-use blocks (of size `field_count`)
@@ -71,7 +71,7 @@ typedef struct mi_arena_s {
 
 // The available arenas
 static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
-static mi_decl_cache_align _Atomic(uintptr_t)   mi_arena_count; // = 0
+static mi_decl_cache_align _Atomic(size_t)      mi_arena_count; // = 0
 
 
 /* -----------------------------------------------------------
@@ -151,7 +151,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
 
 static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {  
-  UNUSED_RELEASE(alignment);
+  MI_UNUSED_RELEASE(alignment);
   mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
   const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);  
   const size_t bcount = mi_block_count_of_size(size);
@@ -286,7 +286,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
   mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
   mi_assert_internal(arena->block_count > 0);
 
-  uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
+  size_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
   if (i >= MI_MAX_ARENAS) {
     mi_atomic_decrement_acq_rel(&mi_arena_count);
     return false;
@@ -437,7 +437,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
 }
 
 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
-  UNUSED(max_secs);
+  MI_UNUSED(max_secs);
   _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
   if (pages_reserved != NULL) *pages_reserved = 0;
   int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
diff --git a/src/bitmap.c b/src/bitmap.c
index 50fd4742..af6de0a1 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* ----------------------------------------------------------------------------
 Concurrent bitmap that can set/reset sequences of bits atomically,
-represeted as an array of fields where each field is a machine word (`uintptr_t`)
+represeted as an array of fields where each field is a machine word (`size_t`)
 
 There are two api's; the standard one cannot have sequences that cross
 between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
@@ -26,11 +26,12 @@ between the fields. (This is used in arena allocation)
 ----------------------------------------------------------- */
 
 // The bit mask for a given number of blocks at a specified bit index.
-static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
+static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
   mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
   mi_assert_internal(count > 0);
   if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
-  return ((((uintptr_t)1 << count) - 1) << bitidx);
+  if (count == 0) return 0;
+  return ((((size_t)1 << count) - 1) << bitidx);
 }
 
 
@@ -45,27 +46,27 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
   mi_assert_internal(bitmap_idx != NULL);
   mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
   mi_assert_internal(count > 0);
-  _Atomic(uintptr_t)* field = &bitmap[idx];
-  uintptr_t map  = mi_atomic_load_relaxed(field);
+  mi_bitmap_field_t* field = &bitmap[idx];
+  size_t map  = mi_atomic_load_relaxed(field);
   if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
 
   // search for 0-bit sequence of length count
-  const uintptr_t mask = mi_bitmap_mask_(count, 0);
-  const size_t    bitidx_max = MI_BITMAP_FIELD_BITS - count;
+  const size_t mask = mi_bitmap_mask_(count, 0);
+  const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
 
 #ifdef MI_HAVE_FAST_BITSCAN
   size_t bitidx = mi_ctz(~map);    // quickly find the first zero bit if possible
 #else
   size_t bitidx = 0;               // otherwise start at 0
 #endif
-  uintptr_t m = (mask << bitidx);     // invariant: m == mask shifted by bitidx
+  size_t m = (mask << bitidx);     // invariant: m == mask shifted by bitidx
 
   // scan linearly for a free range of zero bits
   while (bitidx <= bitidx_max) {
-    const uintptr_t mapm = map & m;
+    const size_t mapm = map & m;
     if (mapm == 0) {  // are the mask bits free at bitidx?
       mi_assert_internal((m >> bitidx) == mask); // no overflow?
-      const uintptr_t newmap = map | m;
+      const size_t newmap = map | m;
       mi_assert_internal((newmap^map) >> bitidx == mask);
       if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) {  // TODO: use strong cas here?
         // no success, another thread claimed concurrently.. keep going (with updated `map`)
@@ -120,10 +121,10 @@ bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, c
 bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
   const size_t idx = mi_bitmap_index_field(bitmap_idx);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
+  const size_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
   // mi_assert_internal((bitmap[idx] & mask) == mask);
-  uintptr_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
+  size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
   return ((prev & mask) == mask);
 }
 
@@ -133,10 +134,10 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count,
 bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
   const size_t idx = mi_bitmap_index_field(bitmap_idx);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
+  const size_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
   //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
-  uintptr_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
+  size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
   if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
   return ((prev & mask) == 0);
 }
@@ -145,9 +146,9 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi
 static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
   const size_t idx = mi_bitmap_index_field(bitmap_idx);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
-  uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]);
+  const size_t mask = mi_bitmap_mask_(count, bitidx);
+  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
+  size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
   if (any_ones != NULL) *any_ones = ((field & mask) != 0);
   return ((field & mask) == mask);
 }
@@ -175,8 +176,8 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
   mi_assert_internal(bitmap_idx != NULL);
   
   // check initial trailing zeros
-  _Atomic(uintptr_t)* field = &bitmap[idx];
-  uintptr_t map = mi_atomic_load_relaxed(field);  
+  mi_bitmap_field_t* field = &bitmap[idx];
+  size_t map = mi_atomic_load_relaxed(field);  
   const size_t initial = mi_clz(map);  // count of initial zeros starting at idx
   mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
   if (initial == 0)     return false;
@@ -185,11 +186,11 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
 
   // scan ahead
   size_t found = initial;
-  uintptr_t mask = 0;     // mask bits for the final field
+  size_t mask = 0;     // mask bits for the final field
   while(found < count) {
     field++;
     map = mi_atomic_load_relaxed(field);
-    const uintptr_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
+    const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
     mask = mi_bitmap_mask_(mask_bits, 0);
     if ((map & mask) != 0) return false;
     found += mask_bits;
@@ -198,13 +199,13 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
 
   // found range of zeros up to the final field; mask contains mask in the final field
   // now claim it atomically
-  _Atomic(uintptr_t)* const final_field = field;
-  const uintptr_t final_mask = mask;
-  _Atomic(uintptr_t)* const initial_field = &bitmap[idx];
-  const uintptr_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial);
+  mi_bitmap_field_t* const final_field = field;
+  const size_t final_mask = mask;
+  mi_bitmap_field_t* const initial_field = &bitmap[idx];
+  const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial);
 
   // initial field
-  uintptr_t newmap;
+  size_t newmap;
   field = initial_field;
   map = mi_atomic_load_relaxed(field);
   do {
@@ -279,8 +280,8 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm
 }
 
 // Helper for masks across fields; returns the mid count, post_mask may be 0
-static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, uintptr_t* pre_mask, uintptr_t* mid_mask, uintptr_t* post_mask) {
-  UNUSED_RELEASE(bitmap_fields);
+static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
+  MI_UNUSED_RELEASE(bitmap_fields);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
   if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) {
     *pre_mask = mi_bitmap_mask_(count, bitidx);
@@ -307,13 +308,13 @@ static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_
 // Returns `true` if all `count` bits were 1 previously.
 bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
   size_t idx = mi_bitmap_index_field(bitmap_idx);
-  uintptr_t pre_mask;
-  uintptr_t mid_mask;
-  uintptr_t post_mask;
+  size_t pre_mask;
+  size_t mid_mask;
+  size_t post_mask;
   size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);  
   bool all_one = true;
-  _Atomic(uintptr_t)*field = &bitmap[idx];
-  uintptr_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
+  mi_bitmap_field_t* field = &bitmap[idx];
+  size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
   if ((prev & pre_mask) != pre_mask) all_one = false;
   while(mid_count-- > 0) {
     prev = mi_atomic_and_acq_rel(field++, ~mid_mask);
@@ -330,14 +331,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
 // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
 bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
   size_t idx = mi_bitmap_index_field(bitmap_idx);
-  uintptr_t pre_mask;
-  uintptr_t mid_mask;
-  uintptr_t post_mask;
+  size_t pre_mask;
+  size_t mid_mask;
+  size_t post_mask;
   size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
   bool all_zero = true;
   bool any_zero = false;
-  _Atomic(uintptr_t)*field = &bitmap[idx];
-  uintptr_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
+  _Atomic(size_t)*field = &bitmap[idx];
+  size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
   if ((prev & pre_mask) != 0) all_zero = false;
   if ((prev & pre_mask) != pre_mask) any_zero = true;
   while (mid_count-- > 0) {
@@ -359,14 +360,14 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
 // `any_ones` is `true` if there was at least one bit set to one.
 static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
   size_t idx = mi_bitmap_index_field(bitmap_idx);
-  uintptr_t pre_mask;
-  uintptr_t mid_mask;
-  uintptr_t post_mask;
+  size_t pre_mask;
+  size_t mid_mask;
+  size_t post_mask;
   size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
   bool all_ones = true;
   bool any_ones = false;
-  _Atomic(uintptr_t)* field = &bitmap[idx];
-  uintptr_t prev = mi_atomic_load_relaxed(field++);
+  mi_bitmap_field_t* field = &bitmap[idx];
+  size_t prev = mi_atomic_load_relaxed(field++);
   if ((prev & pre_mask) != pre_mask) all_ones = false;
   if ((prev & pre_mask) != 0) any_ones = true;
   while (mid_count-- > 0) {
diff --git a/src/bitmap.h b/src/bitmap.h
index aae99279..7bd3106c 100644
--- a/src/bitmap.h
+++ b/src/bitmap.h
@@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 /* ----------------------------------------------------------------------------
 Concurrent bitmap that can set/reset sequences of bits atomically,
-represeted as an array of fields where each field is a machine word (`uintptr_t`)
+represeted as an array of fields where each field is a machine word (`size_t`)
 
 There are two api's; the standard one cannot have sequences that cross
 between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
@@ -24,11 +24,11 @@ between the fields. (This is used in arena allocation)
   Bitmap definition
 ----------------------------------------------------------- */
 
-#define MI_BITMAP_FIELD_BITS   (8*MI_INTPTR_SIZE)
-#define MI_BITMAP_FIELD_FULL   (~((uintptr_t)0))   // all bits set
+#define MI_BITMAP_FIELD_BITS   (8*MI_SIZE_SIZE)
+#define MI_BITMAP_FIELD_FULL   (~((size_t)0))   // all bits set
 
-// An atomic bitmap of `uintptr_t` fields
-typedef _Atomic(uintptr_t)  mi_bitmap_field_t;
+// An atomic bitmap of `size_t` fields
+typedef _Atomic(size_t)  mi_bitmap_field_t;
 typedef mi_bitmap_field_t*  mi_bitmap_t;
 
 // A bitmap index is the index of the bit in a bitmap.
diff --git a/src/heap.c b/src/heap.c
index 1831fce8..d7975b0b 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -50,9 +50,9 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
 
 #if MI_DEBUG>=2
 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  UNUSED(arg1);
-  UNUSED(arg2);
-  UNUSED(pq);
+  MI_UNUSED(arg1);
+  MI_UNUSED(arg2);
+  MI_UNUSED(pq);
   mi_assert_internal(mi_page_heap(page) == heap);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_internal(segment->thread_id == heap->thread_id);
@@ -86,8 +86,8 @@ typedef enum mi_collect_e {
 
 
 static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
-  UNUSED(arg2);
-  UNUSED(heap);
+  MI_UNUSED(arg2);
+  MI_UNUSED(heap);
   mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
   mi_collect_t collect = *((mi_collect_t*)arg_collect);
   _mi_page_free_collect(page, collect >= MI_FORCE);
@@ -104,10 +104,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
 }
 
 static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  UNUSED(arg1);
-  UNUSED(arg2);
-  UNUSED(heap);
-  UNUSED(pq);
+  MI_UNUSED(arg1);
+  MI_UNUSED(arg2);
+  MI_UNUSED(heap);
+  MI_UNUSED(pq);
   _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
   return true; // don't break
 }
@@ -262,10 +262,10 @@ static void mi_heap_free(mi_heap_t* heap) {
 ----------------------------------------------------------- */
 
 static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  UNUSED(arg1);
-  UNUSED(arg2);
-  UNUSED(heap);
-  UNUSED(pq);
+  MI_UNUSED(arg1);
+  MI_UNUSED(arg2);
+  MI_UNUSED(heap);
+  MI_UNUSED(pq);
 
   // ensure no more thread_delayed_free will be added
   _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
@@ -422,8 +422,8 @@ bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {
 
 
 static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) {
-  UNUSED(heap);
-  UNUSED(pq);
+  MI_UNUSED(heap);
+  MI_UNUSED(pq);
   bool* found = (bool*)vfound;
   mi_segment_t* segment = _mi_page_segment(page);
   void* start = _mi_page_start(segment, page, NULL);
@@ -521,8 +521,8 @@ typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_
 
 
 static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
-  UNUSED(heap);
-  UNUSED(pq);
+  MI_UNUSED(heap);
+  MI_UNUSED(pq);
   mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
   mi_heap_area_ex_t xarea;
   const size_t bsize = mi_page_block_size(page);
diff --git a/src/init.c b/src/init.c
index a7e84bf1..20060232 100644
--- a/src/init.c
+++ b/src/init.c
@@ -359,7 +359,7 @@ bool _mi_is_main_thread(void) {
   return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id());
 }
 
-static _Atomic(uintptr_t) thread_count = ATOMIC_VAR_INIT(1);
+static _Atomic(size_t) thread_count = ATOMIC_VAR_INIT(1);
 
 size_t  _mi_current_thread_count(void) {
   return mi_atomic_load_relaxed(&thread_count);
@@ -477,7 +477,7 @@ static void mi_process_load(void) {
   mi_heap_main_init();
   #if defined(MI_TLS_RECURSE_GUARD)
   volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
-  UNUSED(dummy);
+  MI_UNUSED(dummy);
   #endif
   os_preloading = false;
   atexit(&mi_process_done);
@@ -536,7 +536,7 @@ void mi_process_init(void) mi_attr_noexcept {
   if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
     long ksize = mi_option_get(mi_option_reserve_os_memory);
     if (ksize > 0) {
-      mi_reserve_os_memory((size_t)ksize*KiB, true /* commit? */, true /* allow large pages? */);
+      mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */);
     }
   }
 }
@@ -575,8 +575,8 @@ static void mi_process_done(void) {
 #if defined(_WIN32) && defined(MI_SHARED_LIB)
   // Windows DLL: easy to hook into process_init and thread_done
   __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
-    UNUSED(reserved);
-    UNUSED(inst);
+    MI_UNUSED(reserved);
+    MI_UNUSED(inst);
     if (reason==DLL_PROCESS_ATTACH) {
       mi_process_load();
     }
diff --git a/src/options.c b/src/options.c
index 859b3871..3fd58018 100644
--- a/src/options.c
+++ b/src/options.c
@@ -19,8 +19,8 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 
 
-static uintptr_t mi_max_error_count   = 16; // stop outputting errors after this
-static uintptr_t mi_max_warning_count = 16; // stop outputting warnings after this
+static size_t mi_max_error_count   = 16; // stop outputting errors after this
+static size_t mi_max_warning_count = 16; // stop outputting warnings after this
 
 static void mi_add_stderr_output(void);
 
@@ -106,7 +106,7 @@ void _mi_options_init(void) {
   mi_add_stderr_output(); // now it safe to use stderr for output
   for(int i = 0; i < _mi_option_last; i++ ) {
     mi_option_t option = (mi_option_t)i;
-    long l = mi_option_get(option); UNUSED(l); // initialize
+    long l = mi_option_get(option); MI_UNUSED(l); // initialize
     if (option != mi_option_verbose) {
       mi_option_desc_t* desc = &options[option];
       _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
@@ -116,7 +116,7 @@ void _mi_options_init(void) {
   mi_max_warning_count = mi_option_get(mi_option_max_warnings);
 }
 
-long mi_option_get(mi_option_t option) {
+mi_decl_nodiscard long mi_option_get(mi_option_t option) {
   mi_assert(option >= 0 && option < _mi_option_last);
   mi_option_desc_t* desc = &options[option];
   mi_assert(desc->option == option);  // index should match the option
@@ -142,7 +142,7 @@ void mi_option_set_default(mi_option_t option, long value) {
   }
 }
 
-bool mi_option_is_enabled(mi_option_t option) {
+mi_decl_nodiscard bool mi_option_is_enabled(mi_option_t option) {
   return (mi_option_get(option) != 0);
 }
 
@@ -164,7 +164,7 @@ void mi_option_disable(mi_option_t option) {
 
 
 static void mi_out_stderr(const char* msg, void* arg) {
-  UNUSED(arg);
+  MI_UNUSED(arg);
   #ifdef _WIN32
   // on windows with redirection, the C runtime cannot handle locale dependent output
   // after the main thread closes so we use direct console output.
@@ -179,19 +179,19 @@ static void mi_out_stderr(const char* msg, void* arg) {
 // an output function is registered it is called immediately with
 // the output up to that point.
 #ifndef MI_MAX_DELAY_OUTPUT
-#define MI_MAX_DELAY_OUTPUT ((uintptr_t)(32*1024))
+#define MI_MAX_DELAY_OUTPUT ((size_t)(32*1024))
 #endif
 static char out_buf[MI_MAX_DELAY_OUTPUT+1];
-static _Atomic(uintptr_t) out_len;
+static _Atomic(size_t) out_len;
 
 static void mi_out_buf(const char* msg, void* arg) {
-  UNUSED(arg);
+  MI_UNUSED(arg);
   if (msg==NULL) return;
   if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
   size_t n = strlen(msg);
   if (n==0) return;
   // claim space
-  uintptr_t start = mi_atomic_add_acq_rel(&out_len, n);
+  size_t start = mi_atomic_add_acq_rel(&out_len, n);
   if (start >= MI_MAX_DELAY_OUTPUT) return;
   // check bound
   if (start+n >= MI_MAX_DELAY_OUTPUT) {
@@ -254,8 +254,8 @@ static void mi_add_stderr_output() {
 // --------------------------------------------------------
 // Messages, all end up calling `_mi_fputs`.
 // --------------------------------------------------------
-static _Atomic(uintptr_t) error_count;   // = 0;  // when >= max_error_count stop emitting errors
-static _Atomic(uintptr_t) warning_count; // = 0;  // when >= max_warning_count stop emitting warnings
+static _Atomic(size_t) error_count;   // = 0;  // when >= max_error_count stop emitting errors
+static _Atomic(size_t) warning_count; // = 0;  // when >= max_warning_count stop emitting warnings
 
 // When overriding malloc, we may recurse into mi_vfprintf if an allocation
 // inside the C runtime causes another message.
@@ -356,7 +356,7 @@ static mi_error_fun* volatile  mi_error_handler; // = NULL
 static _Atomic(void*) mi_error_arg;     // = NULL
 
 static void mi_error_default(int err) {
-  UNUSED(err);
+  MI_UNUSED(err);
 #if (MI_DEBUG>0) 
   if (err==EFAULT) {
     #ifdef _MSC_VER
@@ -414,9 +414,9 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
 
 #ifdef MI_NO_GETENV
 static bool mi_getenv(const char* name, char* result, size_t result_size) {
-  UNUSED(name);
-  UNUSED(result);
-  UNUSED(result_size);
+  MI_UNUSED(name);
+  MI_UNUSED(result);
+  MI_UNUSED(result_size);
   return false;
 }
 #else
@@ -524,9 +524,9 @@ static void mi_option_init(mi_option_desc_t* desc) {
       if (desc->option == mi_option_reserve_os_memory) {
         // this option is interpreted in KiB to prevent overflow of `long`
         if (*end == 'K') { end++; }
-        else if (*end == 'M') { value *= KiB; end++; }
-        else if (*end == 'G') { value *= MiB; end++; }
-        else { value = (value + KiB - 1) / KiB; }
+        else if (*end == 'M') { value *= MI_KiB; end++; }
+        else if (*end == 'G') { value *= MI_MiB; end++; }
+        else { value = (value + MI_KiB - 1) / MI_KiB; }
         if (*end == 'B') { end++; }
       }
       if (*end == 0) {
diff --git a/src/os.c b/src/os.c
index bed61182..31ad4119 100644
--- a/src/os.c
+++ b/src/os.c
@@ -111,11 +111,11 @@ static bool use_large_os_page(size_t size, size_t alignment) {
 // round to a good OS allocation size (bounded by max 12.5% waste)
 size_t _mi_os_good_alloc_size(size_t size) {
   size_t align_size;
-  if (size < 512*KiB) align_size = _mi_os_page_size();
-  else if (size < 2*MiB) align_size = 64*KiB;
-  else if (size < 8*MiB) align_size = 256*KiB;
-  else if (size < 32*MiB) align_size = 1*MiB;
-  else align_size = 4*MiB;
+  if (size < 512*MI_KiB) align_size = _mi_os_page_size();
+  else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
+  else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
+  else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
+  else align_size = 4*MI_MiB;
   if (mi_unlikely(size >= (SIZE_MAX - align_size))) return size; // possible overflow?
   return _mi_align_up(size, align_size);
 }
@@ -252,7 +252,7 @@ void _mi_os_init() {
     os_page_size = (size_t)result;
     os_alloc_granularity = os_page_size;
   }
-  large_os_page_size = 2*MiB; // TODO: can we query the OS for this?
+  large_os_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
   os_detect_overcommit();
 }
 #endif
@@ -332,11 +332,11 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
 
 static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
   mi_assert_internal(!(large_only && !allow_large));
-  static _Atomic(uintptr_t) large_page_try_ok; // = 0;
+  static _Atomic(size_t) large_page_try_ok; // = 0;
   void* p = NULL;
   if ((large_only || use_large_os_page(size, try_alignment))
       && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
-    uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
+    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
     if (!large_only && try_ok > 0) {
       // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
       // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
@@ -406,7 +406,7 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
 #else
 #define MI_OS_USE_MMAP
 static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
-  UNUSED(try_alignment);  
+  MI_UNUSED(try_alignment);  
   #if defined(MAP_ALIGNED)  // BSD
   if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) {
     size_t n = mi_bsr(try_alignment);
@@ -477,8 +477,8 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   #endif
   // huge page allocation
   if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
-    static _Atomic(uintptr_t) large_page_try_ok; // = 0;
-    uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
+    static _Atomic(size_t) large_page_try_ok; // = 0;
+    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
     if (!large_only && try_ok > 0) {
       // If the OS is not configured for large OS pages, or the user does not have
       // enough permission, the `mmap` will always fail (but it might also fail for other reasons).
@@ -497,7 +497,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       #endif
       #ifdef MAP_HUGE_1GB
       static bool mi_huge_pages_available = true;
-      if ((size % GiB) == 0 && mi_huge_pages_available) {
+      if ((size % MI_GiB) == 0 && mi_huge_pages_available) {
         lflags |= MAP_HUGE_1GB;
       }
       else
@@ -524,7 +524,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
         #endif
         if (large_only) return p;
         if (p == NULL) {
-          mi_atomic_store_release(&large_page_try_ok, (uintptr_t)8);  // on error, don't try again for the next N allocations
+          mi_atomic_store_release(&large_page_try_ok, (size_t)8);  // on error, don't try again for the next N allocations
         }
       }
     }
@@ -585,7 +585,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
 {
   if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL;
   if ((size%MI_SEGMENT_SIZE) != 0) return NULL;
-  if (size > 1*GiB) return NULL;  // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096.
+  if (size > 1*MI_GiB) return NULL;  // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096.
   #if (MI_SECURE>0)
   size += MI_SEGMENT_SIZE;        // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
   #endif
@@ -608,7 +608,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
 // no need for mi_os_get_aligned_hint
 #else
 static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
-  UNUSED(try_alignment); UNUSED(size);
+  MI_UNUSED(try_alignment); MI_UNUSED(size);
   return NULL;
 }
 #endif
@@ -735,7 +735,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
 ----------------------------------------------------------- */
 
 void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
   if (size == 0) return NULL;
   size = _mi_os_good_alloc_size(size);
@@ -744,7 +744,7 @@ void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) {
 }
 
 void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
   if (size == 0 || p == NULL) return;
   size = _mi_os_good_alloc_size(size);
@@ -757,7 +757,7 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
 
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats)
 {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
   if (size == 0) return NULL;
   size = _mi_os_good_alloc_size(size);
   alignment = _mi_align_up(alignment, _mi_os_page_size());
@@ -808,7 +808,7 @@ static void mi_mprotect_hint(int err) {
                         "  > sudo sysctl -w vm.max_map_count=262144\n");
   }
 #else
-  UNUSED(err);
+  MI_UNUSED(err);
 #endif
 }
 
@@ -883,13 +883,13 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
 }
 
 bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
   return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats);
 }
 
 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
   bool is_zero;
   return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
@@ -930,13 +930,13 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   if (p != start) return false;
 #else
 #if defined(MADV_FREE)
-  static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(MADV_FREE);
+  static _Atomic(size_t) advice = ATOMIC_VAR_INIT(MADV_FREE);
   int oadvice = (int)mi_atomic_load_relaxed(&advice);
   int err;
   while ((err = madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0;  };
   if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {  
     // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
-    mi_atomic_store_release(&advice, (uintptr_t)MADV_DONTNEED);
+    mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
     err = madvise(start, csize, MADV_DONTNEED);
   }
 #elif defined(__wasi__)
@@ -958,7 +958,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
 // pages and reduce swapping while keeping the memory committed.
 // We page align to a conservative area inside the range to reset.
 bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
   if (mi_option_is_enabled(mi_option_reset_decommits)) {
     return _mi_os_decommit(addr, size, stats);
@@ -969,7 +969,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
 }
 
 bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
-  UNUSED(tld_stats);
+  MI_UNUSED(tld_stats);
   mi_stats_t* stats = &_mi_stats_main;
   if (mi_option_is_enabled(mi_option_reset_decommits)) {
     return mi_os_commit_unreset(addr, size, is_zero, stats);  // re-commit it (conservatively!)
@@ -1045,12 +1045,12 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
 Support for allocating huge OS pages (1Gib) that are reserved up-front
 and possibly associated with a specific NUMA node. (use `numa_node>=0`)
 -----------------------------------------------------------------------------*/
-#define MI_HUGE_OS_PAGE_SIZE  (GiB)
+#define MI_HUGE_OS_PAGE_SIZE  (MI_GiB)
 
 #if defined(_WIN32) && (MI_INTPTR_SIZE >= 8)
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 {
-  mi_assert_internal(size%GiB == 0);
+  mi_assert_internal(size%MI_GiB == 0);
   mi_assert_internal(addr != NULL);
   const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
 
@@ -1091,7 +1091,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
     return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
   }
   #else
-    UNUSED(numa_node);
+    MI_UNUSED(numa_node);
   #endif
   // otherwise use regular virtual alloc on older windows
   return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
@@ -1108,17 +1108,17 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons
 }
 #else
 static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
-  UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags);
+  MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags);
   return 0;
 }
 #endif
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
-  mi_assert_internal(size%GiB == 0);
+  mi_assert_internal(size%MI_GiB == 0);
   bool is_large = true;
   void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
   if (p == NULL) return NULL;
   if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
-    uintptr_t numa_mask = (1UL << numa_node);
+    unsigned long numa_mask = (1UL << numa_node);
     // TODO: does `mbind` work correctly for huge OS pages? should we
     // use `set_mempolicy` before calling mmap instead?
     // see: <https://lkml.org/lkml/2017/2/9/875>
@@ -1131,7 +1131,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 }
 #else
 static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
-  UNUSED(addr); UNUSED(size); UNUSED(numa_node);
+  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node);
   return NULL;
 }
 #endif
@@ -1167,7 +1167,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
 }
 #else
 static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
-  UNUSED(pages);
+  MI_UNUSED(pages);
   if (total_size != NULL) *total_size = 0;
   return NULL;
 }
@@ -1368,7 +1368,7 @@ size_t _mi_os_numa_node_count_get(void) {
 }
 
 int _mi_os_numa_node_get(mi_os_tld_t* tld) {
-  UNUSED(tld);
+  MI_UNUSED(tld);
   size_t numa_count = _mi_os_numa_node_count();
   if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
   // never more than the node count and >= 0
diff --git a/src/page.c b/src/page.c
index 96d581cf..f33ca033 100644
--- a/src/page.c
+++ b/src/page.c
@@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Index a block in a page
 static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) {
-  UNUSED(page);
+  MI_UNUSED(page);
   mi_assert_internal(page != NULL);
   mi_assert_internal(i <= page->reserved);
   return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
@@ -472,7 +472,7 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
 #define MI_MIN_SLICES       (2)
 
 static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) {
-  UNUSED(stats);
+  MI_UNUSED(stats);
   #if (MI_SECURE<=2)
   mi_assert_internal(page->free == NULL);
   mi_assert_internal(page->local_free == NULL);
@@ -530,7 +530,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
 
 static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats)
 {
-  UNUSED(stats);
+  MI_UNUSED(stats);
   #if (MI_SECURE <= 2)
   mi_assert_internal(page->free == NULL);
   mi_assert_internal(page->local_free == NULL);
@@ -571,7 +571,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
 // allocations but this did not speed up any benchmark (due to an
 // extra test in malloc? or cache effects?)
 static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
-  UNUSED(tld); 
+  MI_UNUSED(tld); 
   mi_assert_expensive(mi_page_is_valid_init(page));
   #if (MI_SECURE<=2)
   mi_assert(page->free == NULL);
diff --git a/src/region.c b/src/region.c
new file mode 100644
index 00000000..f864f73b
--- /dev/null
+++ b/src/region.c
@@ -0,0 +1,505 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2020, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
+and the segment and huge object allocation by mimalloc. There may be multiple
+implementations of this (one could be the identity going directly to the OS,
+another could be a simple cache etc), but the current one uses large "regions".
+In contrast to the rest of mimalloc, the "regions" are shared between threads and
+need to be accessed using atomic operations.
+We need this memory layer between the raw OS calls because of:
+1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
+   to reuse memory effectively.
+2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
+   an OS allocation/free is still (much) too expensive relative to the accesses 
+   in that object :-( (`malloc-large` tests this). This means we need a cheaper 
+   way to reuse memory.
+3. This layer allows for NUMA aware allocation.
+
+Possible issues:
+- (2) can potentially be addressed too with a small cache per thread which is much
+  simpler. Generally though that requires shrinking of huge pages, and may overuse
+  memory per thread. (and is not compatible with `sbrk`).
+- Since the current regions are per-process, we need atomic operations to
+  claim blocks which may be contended
+- In the worst case, we need to search the whole region map (16KiB for 256GiB)
+  linearly. At what point will direct OS calls be faster? Is there a way to
+  do this better without adding too much complexity?
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+#include "mimalloc-atomic.h"
+
+#include <string.h>  // memset
+
+#include "bitmap.h"
+
+// Internal raw OS interface
+size_t  _mi_os_large_page_size(void);
+bool    _mi_os_protect(void* addr, size_t size);
+bool    _mi_os_unprotect(void* addr, size_t size);
+bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
+bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
+bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+
+// arena.c
+void    _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats);
+void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+
+
+
+// Constants
+#if (MI_INTPTR_SIZE==8)
+#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map 
+#elif (MI_INTPTR_SIZE==4)
+#define MI_HEAP_REGION_MAX_SIZE    (3 * MI_GiB)    // ~ KiB for the region map
+#else
+#error "define the maximum heap space allowed for regions on this platform"
+#endif
+
+#define MI_SEGMENT_ALIGN          MI_SEGMENT_SIZE
+
+#define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
+#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
+#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
+#define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
+#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)  
+
+// Region info 
+typedef union mi_region_info_u {
+  size_t value;      
+  struct {
+    bool  valid;        // initialized?
+    bool  is_large:1;   // allocated in fixed large/huge OS pages
+    bool  is_pinned:1;  // pinned memory cannot be decommitted
+    short numa_node;    // the associated NUMA node (where -1 means no associated node)
+  } x;
+} mi_region_info_t;
+
+
+// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
+// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
+typedef struct mem_region_s {
+  _Atomic(size_t)           info;        // mi_region_info_t.value
+  _Atomic(void*)            start;       // start of the memory area 
+  mi_bitmap_field_t         in_use;      // bit per in-use block
+  mi_bitmap_field_t         dirty;       // track if non-zero per block
+  mi_bitmap_field_t         commit;      // track if committed per block
+  mi_bitmap_field_t         reset;       // track if reset per block
+  _Atomic(size_t)           arena_memid; // if allocated from a (huge page) arena
+  size_t                    padding;     // round to 8 fields
+} mem_region_t;
+
+// The region map
+static mem_region_t regions[MI_REGION_MAX];
+
+// Allocated regions
+static _Atomic(size_t) regions_count; // = 0;        
+
+
+/* ----------------------------------------------------------------------------
+Utility functions
+-----------------------------------------------------------------------------*/
+
+// Blocks (of 4MiB) needed for the given size.
+static size_t mi_region_block_count(size_t size) {
+  return _mi_divide_up(size, MI_SEGMENT_SIZE);
+}
+
+/*
+// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
+static size_t mi_good_commit_size(size_t size) {
+  if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
+  return _mi_align_up(size, _mi_os_large_page_size());
+}
+*/
+
+// Return if a pointer points into a region reserved by us.
+bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  if (p==NULL) return false;
+  size_t count = mi_atomic_load_relaxed(&regions_count);
+  for (size_t i = 0; i < count; i++) {
+    uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, &regions[i].start);
+    if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
+  }
+  return false;
+}
+
+
+static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
+  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
+  mi_assert_internal(start != NULL);
+  return (start + (bit_idx * MI_SEGMENT_SIZE));  
+}
+
+static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
+  mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
+  size_t idx = region - regions;
+  mi_assert_internal(&regions[idx] == region);
+  return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
+}
+
+static size_t mi_memid_create_from_arena(size_t arena_memid) {
+  return (arena_memid << 1) | 1;
+}
+
+
+static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
+  if ((id&1)==1) {
+    if (arena_memid != NULL) *arena_memid = (id>>1);
+    return true;
+  }
+  else {
+    size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
+    *bit_idx   = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
+    *region    = &regions[idx];
+    return false;
+  }
+}
+
+
+/* ----------------------------------------------------------------------------
+  Allocate a region is allocated from the OS (or an arena)
+-----------------------------------------------------------------------------*/
+
+static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
+{
+  // not out of regions yet?
+  if (mi_atomic_load_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
+
+  // try to allocate a fresh region from the OS
+  bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
+  bool region_large = (commit && allow_large);
+  bool is_zero = false;
+  bool is_pinned = false;
+  size_t arena_memid = 0;
+  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_pinned, &is_zero, &arena_memid, tld);
+  if (start == NULL) return false;
+  mi_assert_internal(!(region_large && !allow_large));
+  mi_assert_internal(!region_large || region_commit);
+
+  // claim a fresh slot
+  const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
+  if (idx >= MI_REGION_MAX) {
+    mi_atomic_decrement_acq_rel(&regions_count);
+    _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
+    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
+    return false;
+  }
+
+  // allocated, initialize and claim the initial blocks
+  mem_region_t* r = &regions[idx];
+  r->arena_memid  = arena_memid;
+  mi_atomic_store_release(&r->in_use, (size_t)0);
+  mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
+  mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
+  mi_atomic_store_release(&r->reset, (size_t)0);
+  *bit_idx = 0;
+  _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
+  mi_atomic_store_ptr_release(void,&r->start, start);
+
+  // and share it 
+  mi_region_info_t info;
+  info.value = 0;                        // initialize the full union to zero
+  info.x.valid = true;
+  info.x.is_large = region_large;
+  info.x.is_pinned = is_pinned;
+  info.x.numa_node = (short)_mi_os_numa_node(tld);
+  mi_atomic_store_release(&r->info, info.value); // now make it available to others
+  *region = r;
+  return true;
+}
+
+/* ----------------------------------------------------------------------------
+  Try to claim blocks in suitable regions
+-----------------------------------------------------------------------------*/
+
+static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
+  // initialized at all?
+  mi_region_info_t info;
+  info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info);
+  if (info.value==0) return false;
+
+  // numa correct
+  if (numa_node >= 0) {  // use negative numa node to always succeed
+    int rnode = info.x.numa_node;
+    if (rnode >= 0 && rnode != numa_node) return false;
+  }
+
+  // check allow-large
+  if (!allow_large && info.x.is_large) return false;
+
+  return true;
+}
+
+
+static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
+{
+  // try all regions for a free slot  
+  const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
+  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
+  for (size_t visited = 0; visited < count; visited++, idx++) {
+    if (idx >= count) idx = 0;  // wrap around
+    mem_region_t* r = &regions[idx];
+    // if this region suits our demand (numa node matches, large OS page matches)
+    if (mi_region_is_suitable(r, numa_node, allow_large)) {
+      // then try to atomically claim a segment(s) in this region
+      if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
+        tld->region_idx = idx;    // remember the last found position
+        *region = r;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{
+  mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
+  mem_region_t* region;
+  mi_bitmap_index_t bit_idx;
+  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
+  // try to claim in existing regions
+  if (!mi_region_try_claim(numa_node, blocks, *large, &region, &bit_idx, tld)) {
+    // otherwise try to allocate a fresh region and claim in there
+    if (!mi_region_try_alloc_os(blocks, *commit, *large, &region, &bit_idx, tld)) {
+      // out of regions or memory
+      return NULL;
+    }
+  }
+  
+  // ------------------------------------------------
+  // found a region and claimed `blocks` at `bit_idx`, initialize them now
+  mi_assert_internal(region != NULL);
+  mi_assert_internal(_mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
+
+  mi_region_info_t info;
+  info.value = mi_atomic_load_acquire(&region->info);
+  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&region->start);
+  mi_assert_internal(!(info.x.is_large && !*large));
+  mi_assert_internal(start != NULL);
+
+  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);  
+  *large     = info.x.is_large;
+  *is_pinned = info.x.is_pinned;
+  *memid     = mi_memid_create(region, bit_idx);
+  void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
+
+  // commit
+  if (*commit) {
+    // ensure commit
+    bool any_uncommitted;
+    _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
+    if (any_uncommitted) {
+      mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
+      bool commit_zero = false;
+      if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) {
+        // failed to commit! unclaim and return
+        mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
+        return NULL;
+      }
+      if (commit_zero) *is_zero = true;      
+    }
+  }
+  else {
+    // no need to commit, but check if already fully committed
+    *commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
+  }  
+  mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
+
+  // unreset reset blocks
+  if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
+    // some blocks are still reset
+    mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
+    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); 
+    mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
+    if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
+      bool reset_zero = false;
+      _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
+      if (reset_zero) *is_zero = true;
+    }
+  }
+  mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
+  
+  #if (MI_DEBUG>=2)
+  if (*commit) { ((uint8_t*)p)[0] = 0; }
+  #endif
+  
+  // and return the allocation  
+  mi_assert_internal(p != NULL);  
+  return p;
+}
+
+
+/* ----------------------------------------------------------------------------
+ Allocation
+-----------------------------------------------------------------------------*/
+
+// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
+// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
+void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{
+  mi_assert_internal(memid != NULL && tld != NULL);
+  mi_assert_internal(size > 0);
+  *memid = 0;
+  *is_zero = false;
+  *is_pinned = false;
+  bool default_large = false;
+  if (large==NULL) large = &default_large;  // ensure `large != NULL`  
+  if (size == 0) return NULL;
+  size = _mi_align_up(size, _mi_os_page_size());
+
+  // allocate from regions if possible
+  void* p = NULL;
+  size_t arena_memid;
+  const size_t blocks = mi_region_block_count(size);
+  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
+    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);    
+    if (p == NULL) {
+      _mi_warning_message("unable to allocate from region: size %zu\n", size);
+    }
+  }
+  if (p == NULL) {
+    // and otherwise fall back to the OS
+    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, &arena_memid, tld);
+    *memid = mi_memid_create_from_arena(arena_memid);
+  }
+
+  if (p != NULL) {
+    mi_assert_internal((uintptr_t)p % alignment == 0);
+#if (MI_DEBUG>=2)
+    if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
+#endif
+  }
+  return p;
+}
+
+
+
+/* ----------------------------------------------------------------------------
+Free
+-----------------------------------------------------------------------------*/
+
+// Free previously allocated memory with a given id.
+void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
+  mi_assert_internal(size > 0 && tld != NULL);
+  if (p==NULL) return;
+  if (size==0) return;
+  size = _mi_align_up(size, _mi_os_page_size());
+  
+  size_t arena_memid = 0;
+  mi_bitmap_index_t bit_idx;
+  mem_region_t* region;
+  if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
+   // was a direct arena allocation, pass through
+    _mi_arena_free(p, size, arena_memid, full_commit, tld->stats);
+  }
+  else {
+    // allocated in a region
+    mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
+    const size_t blocks = mi_region_block_count(size);
+    mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
+    mi_region_info_t info;
+    info.value = mi_atomic_load_acquire(&region->info);
+    mi_assert_internal(info.value != 0);
+    void* blocks_start = mi_region_blocks_start(region, bit_idx);
+    mi_assert_internal(blocks_start == p); // not a pointer in our area?
+    mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
+    if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
+
+    // committed?
+    if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
+      _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
+    }
+
+    if (any_reset) {
+      // set the is_reset bits if any pages were reset
+      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
+    }
+
+    // reset the blocks to reduce the working set.
+    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) 
+       && (mi_option_is_enabled(mi_option_eager_commit) ||
+           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead            
+    {
+      bool any_unreset;
+      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
+      if (any_unreset) {
+        _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
+        _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
+      }
+    }    
+
+    // and unclaim
+    bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
+    mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed);
+  }
+}
+
+
+/* ----------------------------------------------------------------------------
+  collection
+-----------------------------------------------------------------------------*/
+void _mi_mem_collect(mi_os_tld_t* tld) {
+  // free every region that has no segments in use.
+  size_t rcount = mi_atomic_load_relaxed(&regions_count);
+  for (size_t i = 0; i < rcount; i++) {
+    mem_region_t* region = &regions[i];
+    if (mi_atomic_load_relaxed(&region->info) != 0) {
+      // if no segments used, try to claim the whole region
+      size_t m = mi_atomic_load_relaxed(&region->in_use);
+      while (m == 0 && !mi_atomic_cas_weak_release(&region->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
+      if (m == 0) {
+        // on success, free the whole region
+        uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&regions[i].start);
+        size_t arena_memid = mi_atomic_load_relaxed(&regions[i].arena_memid);
+        size_t commit = mi_atomic_load_relaxed(&regions[i].commit);
+        memset((void*)&regions[i], 0, sizeof(mem_region_t));  // cast to void* to avoid atomic warning
+        // and release the whole region
+        mi_atomic_store_release(&region->info, (size_t)0);
+        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {         
+          _mi_abandoned_await_readers(); // ensure no pending reads
+          _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
+        }
+      }
+    }
+  }
+}
+
+
+/* ----------------------------------------------------------------------------
+  Other
+-----------------------------------------------------------------------------*/
+
+bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
+  return _mi_os_reset(p, size, tld->stats);
+}
+
+bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
+  return _mi_os_unreset(p, size, is_zero, tld->stats);
+}
+
+bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
+  return _mi_os_commit(p, size, is_zero, tld->stats);
+}
+
+bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
+  return _mi_os_decommit(p, size, tld->stats);
+}
+
+bool _mi_mem_protect(void* p, size_t size) {
+  return _mi_os_protect(p, size);
+}
+
+bool _mi_mem_unprotect(void* p, size_t size) {
+  return _mi_os_unprotect(p, size);
+}
diff --git a/src/segment-cache.c b/src/segment-cache.c
index 94862cc1..d773658d 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -117,7 +117,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 
 static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
 {
-  UNUSED(tld);
+  MI_UNUSED(tld);
   mi_msecs_t now = _mi_clock_now();
   size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
   size_t purged = 0;
diff --git a/src/segment.c b/src/segment.c
index d953438a..c164eb30 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -1097,13 +1097,13 @@ static mi_decl_cache_align _Atomic(mi_segment_t*)       abandoned_visited; // =
 static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned;         // = NULL
 
 // Maintain these for debug purposes (these counts may be a bit off)
-static mi_decl_cache_align _Atomic(uintptr_t)           abandoned_count; 
-static mi_decl_cache_align _Atomic(uintptr_t)           abandoned_visited_count;
+static mi_decl_cache_align _Atomic(size_t)           abandoned_count; 
+static mi_decl_cache_align _Atomic(size_t)           abandoned_visited_count;
 
 // We also maintain a count of current readers of the abandoned list
 // in order to prevent resetting/decommitting segment memory if it might
 // still be read.
-static mi_decl_cache_align _Atomic(uintptr_t)           abandoned_readers; // = 0
+static mi_decl_cache_align _Atomic(size_t)           abandoned_readers; // = 0
 
 // Push on the visited list
 static void mi_abandoned_visited_push(mi_segment_t* segment) {
@@ -1132,7 +1132,7 @@ static bool mi_abandoned_visited_revisit(void)
   mi_tagged_segment_t afirst;
   mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
   if (mi_tagged_segment_ptr(ts)==NULL) {
-    uintptr_t count = mi_atomic_load_relaxed(&abandoned_visited_count);
+    size_t count = mi_atomic_load_relaxed(&abandoned_visited_count);
     afirst = mi_tagged_segment(first, ts);
     if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) {
       mi_atomic_add_relaxed(&abandoned_count, count);
@@ -1151,7 +1151,7 @@ static bool mi_abandoned_visited_revisit(void)
   // and atomically prepend to the abandoned list
   // (no need to increase the readers as we don't access the abandoned segments)
   mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned);
-  uintptr_t count;
+  size_t count;
   do {
     count = mi_atomic_load_relaxed(&abandoned_visited_count);
     mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext));
@@ -1180,7 +1180,7 @@ static void mi_abandoned_push(mi_segment_t* segment) {
 // Wait until there are no more pending reads on segments that used to be in the abandoned list
 // called for example from `arena.c` before decommitting
 void _mi_abandoned_await_readers(void) {
-  uintptr_t n;
+  size_t n;
   do {
     n = mi_atomic_load_acquire(&abandoned_readers);
     if (n != 0) mi_atomic_yield();
@@ -1527,8 +1527,8 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
 
   // claim it and free
   mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized.
-  // paranoia: if this is the last reference, the cas should always succeed
-  uintptr_t expected_tid = 0;
+  // paranoia: if this it the last reference, the cas should always succeed
+  size_t expected_tid = 0;
   if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) {
     mi_block_set_next(page, block, page->free);
     page->free = block;