From d36d04b4a6e5ada99fa36447332e5d7d3b1d33be Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 31 Oct 2019 15:35:10 -0700
Subject: [PATCH] add arena for huge page management

---
 ide/vs2019/mimalloc-override.vcxproj |   1 +
 ide/vs2019/mimalloc.vcxproj          |   1 +
 include/mimalloc-internal.h          |   1 +
 src/arena.c                          | 369 +++++++++++++++++++++++++++
 src/memory.c                         |  80 ++++--
 src/os.c                             |   4 +-
 6 files changed, 435 insertions(+), 21 deletions(-)
 create mode 100644 src/arena.c
diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
index 96a8924f..09fd37fb 100644
--- a/ide/vs2019/mimalloc-override.vcxproj
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -231,6 +231,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index 28e96d71..1fabff5e 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -217,6 +217,7 @@
     </ClCompile>
     <ClCompile Include="..\..\src\alloc-posix.c" />
     <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena.c" />
     <ClCompile Include="..\..\src\heap.c" />
     <ClCompile Include="..\..\src\init.c" />
     <ClCompile Include="..\..\src\memory.c" />
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index ccf12a06..2b881ac9 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -57,6 +57,7 @@ void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocat
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 size_t     _mi_os_good_alloc_size(size_t size);
 
+
 // memory.c
 void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
 void       _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats);
diff --git a/src/arena.c b/src/arena.c
new file mode 100644
index 00000000..5f33965a
--- /dev/null
+++ b/src/arena.c
@@ -0,0 +1,369 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+#include "mimalloc-atomic.h"
+
+#include <string.h>  // memset
+
+// os.c
+void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
+void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
+int   _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
+
+/* -----------------------------------------------------------
+  Arena allocation
+----------------------------------------------------------- */
+
+#define MI_SEGMENT_ALIGN     MI_SEGMENT_SIZE
+#define MI_ARENA_BLOCK_SIZE  (4*MI_SEGMENT_ALIGN)  // 16MiB
+#define MI_MAX_ARENAS        (64)
+
+// Block info: bit 0 contains the `in_use` bit, the upper bits the
+// size in count of arena blocks.
+typedef uintptr_t mi_block_info_t;
+
+// A memory arena descriptor
+typedef struct mi_arena_s {
+  uint8_t* start;                         // the start of the memory area
+  size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
+  bool     is_zero_init;                  // is the arena zero initialized?
+  bool     is_large;                      // large OS page allocated
+  _Atomic(uintptr_t)       block_bottom;  // optimization to start the search for free blocks
+  _Atomic(mi_block_info_t) blocks[1];     // `block_count` block info's
+} mi_arena_t;
+
+
+// The available arenas
+static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
+static _Atomic(uintptr_t)   mi_arena_count; // = 0
+
+
+/* -----------------------------------------------------------
+  Arena allocations get a memory id where the lower 8 bits are
+  the arena index +1, and the upper bits the block index.
+----------------------------------------------------------- */
+
+// Use `0` as a special id for direct OS allocated memory.
+#define MI_MEMID_OS   0
+
+static size_t mi_memid_create(size_t arena_index, size_t block_index) {
+  mi_assert_internal(arena_index < 0xFE);
+  return ((block_index << 8) | ((arena_index+1) & 0xFF));
+}
+
+static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) {
+  mi_assert_internal(memid != MI_MEMID_OS);
+  *arena_index = (memid & 0xFF) - 1;
+  *block_index = (memid >> 8);
+}
+
+/* -----------------------------------------------------------
+  Block info
+----------------------------------------------------------- */
+
+static bool mi_block_is_in_use(mi_block_info_t info) {
+  return ((info&1) != 0);
+}
+
+static size_t mi_block_count(mi_block_info_t info) {
+  return (info>>1);
+}
+
+static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) {
+  return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0));
+}
+
+
+/* -----------------------------------------------------------
+  Thread safe allocation in an arena
+----------------------------------------------------------- */
+
+static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index)
+{
+  // Scan linearly through all block info's
+  // Skipping used ranges, coalescing free ranges on demand.
+  mi_assert_internal(needed_bcount > 0);
+  mi_assert_internal(start_idx <= arena->block_count);
+  mi_assert_internal(end_idx <= arena->block_count);
+  _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx];
+  _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx];
+  while (block < end) {
+    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
+    size_t bcount = mi_block_count(binfo);
+    if (mi_block_is_in_use(binfo)) {
+      // in-use, skip ahead
+      mi_assert_internal(bcount > 0);
+      block += bcount;
+    }
+    else {
+      // free blocks
+      if (bcount==0) {
+        // optimization:
+        // use 0 initialized blocks at the end, to use single atomic operation
+        // initially to reduce contention (as we don't need to split)
+        if (block + needed_bcount > end) {
+          return NULL; // does not fit
+        }
+        else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) {
+          // ouch, someone else was quicker. Try again..
+          continue;
+        }
+        else {
+          // we got it: return a pointer to the claimed memory
+          ptrdiff_t idx = (block - arena->blocks);
+          *is_zero = arena->is_zero_init;
+          *block_index = idx;
+          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
+        }
+      }
+
+      mi_assert_internal(bcount>0);
+      if (needed_bcount > bcount) {
+#if 0 // MI_NO_ARENA_COALESCE
+        block += bcount; // too small, skip to the next range
+        continue;
+#else
+        // too small, try to coalesce
+        _Atomic(mi_block_info_t)* block_next = block + bcount;
+        if (block_next >= end) {
+          return NULL; // does not fit
+        }
+        mi_block_info_t binfo_next = mi_atomic_read(block_next);
+        size_t bcount_next = mi_block_count(binfo_next);
+        if (mi_block_is_in_use(binfo_next)) {
+          // next block is in use, cannot coalesce
+          block += (bcount + bcount_next); // skip ahea over both blocks
+        }
+        else {
+          // next block is free, try to coalesce
+          // first set the next one to being used to prevent dangling ranges
+          if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) {
+            // someone else got in before us.. try again
+            continue;
+          }
+          else {
+            if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) {  // use strong to increase success chance
+              // someone claimed/coalesced the block in the meantime
+              // first free the next block again..
+              bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong
+              mi_assert(ok); UNUSED(ok);
+              // and try again
+              continue;
+            }
+            else {
+              // coalesced! try again
+              // todo: we could optimize here to immediately claim the block if the
+              // coalesced size is a fit instead of retrying. Keep it simple for now.
+              continue;
+            }
+          }
+        }
+#endif
+      }
+      else {  // needed_bcount <= bcount
+        mi_assert_internal(needed_bcount <= bcount);
+        // it fits, claim the whole block
+        if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) {
+          // ouch, someone else was quicker. Try again..
+          continue;
+        }
+        else {
+          // got it, now split off the needed part
+          if (needed_bcount < bcount) {
+            mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false));
+            mi_atomic_write(block, mi_block_info_create(needed_bcount, true));
+          }
+          // return a pointer to the claimed memory
+          ptrdiff_t idx = (block - arena->blocks);
+          *is_zero = false;
+          *block_index = idx;
+          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
+        }
+      }
+    }
+  }
+  // no success
+  return NULL;
+}
+
+// Try to reduce search time by starting from bottom and wrap around.
+static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index)
+{
+  uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom);
+  void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index);
+  if (p == NULL && bottom > 0) {
+    // try again from the start
+    p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index);
+  }
+  if (p != NULL) {
+    mi_atomic_write(&arena->block_bottom, *block_index);
+  }
+  return p;
+}
+
+/* -----------------------------------------------------------
+  Arena Allocation
+----------------------------------------------------------- */
+
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
+  mi_assert_internal(memid != NULL && tld != NULL);
+  mi_assert_internal(size > 0);
+  *memid = MI_MEMID_OS;
+  *is_zero = false;
+  bool default_large = false;
+  if (large==NULL) large = &default_large;  // ensure `large != NULL`
+
+  // try to allocate in an arena if the alignment is small enough
+  // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`.
+  if (alignment <= MI_SEGMENT_ALIGN &&
+      size >= 3*(MI_ARENA_BLOCK_SIZE/4) &&  // > 12MiB (not more than 25% waste)
+      !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB>
+     )
+  {
+    size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
+    size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
+
+    mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
+      mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
+      if (arena==NULL) break;
+      if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages
+        size_t block_index = SIZE_MAX;
+        void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index);
+        if (p != NULL) {
+          mi_assert_internal(block_index != SIZE_MAX);
+          #if MI_DEBUG>=1
+            _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
+            mi_block_info_t binfo = mi_atomic_read(block);
+            mi_assert_internal(mi_block_is_in_use(binfo));
+            mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
+          #endif
+          *memid  = mi_memid_create(i, block_index);
+          *commit = true;           // TODO: support commit on demand?
+          *large  = arena->is_large;
+          mi_assert_internal((uintptr_t)p % alignment == 0);
+          return p;
+        }
+      }
+    }
+  }
+
+  // fall back to the OS
+  *is_zero = true;
+  *memid = MI_MEMID_OS;
+  return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
+}
+
+void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld);
+}
+
+/* -----------------------------------------------------------
+  Arena free
+----------------------------------------------------------- */
+
+void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
+  mi_assert_internal(size > 0 && stats != NULL);
+  if (p==NULL) return;
+  if (size==0) return;
+  if (memid == MI_MEMID_OS) {
+    // was a direct OS allocation, pass through
+    _mi_os_free(p, size, stats);
+  }
+  else {
+    // allocated in an arena
+    size_t arena_idx;
+    size_t block_idx;
+    mi_memid_indices(memid, &arena_idx, &block_idx);
+    mi_assert_internal(arena_idx < MI_MAX_ARENAS);
+    mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
+    mi_assert_internal(arena != NULL);
+    if (arena == NULL) {
+      _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+      return;
+    }
+    mi_assert_internal(arena->block_count > block_idx);
+    if (arena->block_count <= block_idx) {
+      _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+      return;
+    }
+    _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx];
+    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
+    mi_assert_internal(mi_block_is_in_use(binfo));
+    mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
+    if (!mi_block_is_in_use(binfo)) {
+      _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
+      return;
+    };
+    bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo);
+    mi_assert_internal(ok);
+    if (!ok) {
+      _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo);
+    }
+    if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) {
+      mi_atomic_write(&arena->block_bottom, block_idx);
+    }
+  }
+}
+
+/* -----------------------------------------------------------
+  Add an arena.
+----------------------------------------------------------- */
+
+static bool mi_arena_add(mi_arena_t* arena) {
+  mi_assert_internal(arena != NULL);
+  mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
+  mi_assert_internal(arena->block_count > 0);
+  mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t)));
+
+  uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
+  if (i >= MI_MAX_ARENAS) {
+    mi_atomic_subu(&mi_arena_count, 1);
+    return false;
+  }
+  mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena);
+  return true;
+}
+
+
+/* -----------------------------------------------------------
+  Reserve a huge page arena.
+  TODO: improve OS api to just reserve and claim a huge
+  page area at once, (and return the total size).
+----------------------------------------------------------- */
+
+#include <errno.h>
+
+int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+  size_t pages_reserved_default = 0;
+  if (pages_reserved==NULL) pages_reserved = &pages_reserved_default;
+  int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved);
+  if (*pages_reserved==0) return err;
+  size_t hsize = (*pages_reserved) * GiB;
+  void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN);
+  mi_assert_internal(p != NULL);
+  if (p == NULL) return ENOMEM;
+  size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
+  size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
+  mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats);
+  if (arena == NULL) return ENOMEM;
+  arena->block_count = bcount;
+  arena->start = (uint8_t*)p;
+  arena->block_bottom = 0;
+  arena->is_large = true;
+  arena->is_zero_init = true;
+  memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
+  //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false));
+  mi_arena_add(arena);
+  return 0;
+}
diff --git a/src/memory.c b/src/memory.c
index dd03cf95..9ab7c850 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -50,6 +50,12 @@ void    _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* sta
 void*   _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
 bool    _mi_os_is_huge_reserved(void* p);
 
+// arena.c
+void    _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
+void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+
+
 // Constants
 #if (MI_INTPTR_SIZE==8)
 #define MI_HEAP_REGION_MAX_SIZE    (256 * (1ULL << 30))  // 256GiB => 16KiB for the region map
@@ -87,6 +93,7 @@ typedef struct mem_region_s {
   volatile _Atomic(uintptr_t)        map;   // in-use bit per MI_SEGMENT_SIZE block
   volatile _Atomic(mi_region_info_t) info;  // start of virtual memory area, and flags
   volatile _Atomic(uintptr_t)        dirty_mask; // bit per block if the contents are not zero'd
+  size_t   arena_memid; 
 } mem_region_t;
 
 
@@ -131,6 +138,30 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
 }
 
 
+static size_t mi_memid_create(size_t idx, size_t bitidx) {
+  return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1;
+}
+
+static size_t mi_memid_create_from_arena(size_t arena_memid) {
+  return (arena_memid << 1) | 1;
+}
+
+static bool mi_memid_is_arena(size_t id) {
+  return ((id&1)==1);
+}
+
+static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) {
+  if (mi_memid_is_arena(id)) {
+    *arena_memid = (id>>1);
+    return true;
+  }
+  else {
+    *idx = ((id>>1) / MI_REGION_MAP_BITS);
+    *bitidx = ((id>>1) % MI_REGION_MAP_BITS);
+    return false;
+  }
+}
+
 /* ----------------------------------------------------------------------------
 Commit from a region
 -----------------------------------------------------------------------------*/
@@ -153,6 +184,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
   {
     bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
     bool region_large  = *allow_large;
+    size_t arena_memid = 0;
+    void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, is_zero, &arena_memid, tld);
+    /*
     void* start = NULL;
     if (region_large) {
       start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN);
@@ -161,6 +195,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
     if (start == NULL) {
       start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
     }
+    */
     mi_assert_internal(!(region_large && !*allow_large));
 
     if (start == NULL) {
@@ -176,6 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
     info = mi_region_info_create(start,region_large,region_commit);
     if (mi_atomic_cas_strong(&region->info, info, 0)) {
       // update the region count
+      region->arena_memid = arena_memid;
       mi_atomic_increment(&regions_count);
     }
     else {
@@ -183,6 +219,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       // we assign it to a later slot instead (up to 4 tries).
       for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
         if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
+          regions[idx+i].arena_memid = arena_memid;
           mi_atomic_increment(&regions_count);
           start = NULL;
           break;
@@ -190,7 +227,8 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       }
       if (start != NULL) {
         // free it if we didn't succeed to save it to some other region
-        _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
+        _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
+        // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
       }
       // and continue with the memory at our index
       info = mi_atomic_read(&region->info);
@@ -229,7 +267,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
   mi_assert_internal(blocks_start != NULL);
   *allow_large = region_is_large;
   *p  = blocks_start;
-  *id = (idx*MI_REGION_MAP_BITS) + bitidx;
+  *id = mi_memid_create(idx, bitidx); 
   return true;
 }
 
@@ -269,7 +307,7 @@ static inline size_t mi_bsr(uintptr_t x) {
 
 // Allocate `blocks` in a `region` at `idx` of a given `size`.
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
-// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
+// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
 static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, 
                                    bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
@@ -366,15 +404,17 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
 {
   mi_assert_internal(id != NULL && tld != NULL);
   mi_assert_internal(size > 0);
-  *id = SIZE_MAX;
+  *id = 0;
   *is_zero = false;
   bool default_large = false;
   if (large==NULL) large = &default_large;  // ensure `large != NULL`  
 
-  // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`)
+  // use direct OS allocation for huge blocks or alignment 
   if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
-    *is_zero = true;
-    return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld);  // round up size
+    size_t arena_memid = 0;
+    void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld);  // round up size
+    *id = mi_memid_create_from_arena(arena_memid);
+    return p;
   }
 
   // always round size to OS page size multiple (so commit/decommit go over the entire range)
@@ -405,9 +445,10 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
 
   if (p == NULL) {
     // we could not find a place to allocate, fall back to the os directly
-    _mi_warning_message("unable to allocate from region: size %zu\n", size);
-    *is_zero = true;
-    p = _mi_os_alloc_aligned(size, alignment, commit, large, tld);
+    _mi_warning_message("unable to allocate from region: size %zu\n", size);    
+    size_t arena_memid = 0;
+    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
+    *id = mi_memid_create_from_arena(arena_memid);
   }
   else {
     tld->region_idx = idx;  // next start of search? currently not used as we use first-fit
@@ -428,18 +469,19 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
   mi_assert_internal(size > 0 && stats != NULL);
   if (p==NULL) return;
   if (size==0) return;
-  if (id == SIZE_MAX) {
-   // was a direct OS allocation, pass through
-    _mi_os_free(p, size, stats);
+  size_t arena_memid = 0;
+  size_t idx = 0;
+  size_t bitidx = 0;
+  if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) {
+   // was a direct arena allocation, pass through
+    _mi_arena_free(p, size, arena_memid, stats);
   }
   else {
     // allocated in a region
     mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return;
     // we can align the size up to page size (as we allocate that way too)
     // this ensures we fully commit/decommit/reset
-    size = _mi_align_up(size, _mi_os_page_size());
-    size_t idx = (id / MI_REGION_MAP_BITS);
-    size_t bitidx = (id % MI_REGION_MAP_BITS);
+    size = _mi_align_up(size, _mi_os_page_size());    
     size_t blocks = mi_region_block_count(size);
     size_t mask = mi_region_block_mask(blocks, bitidx);
     mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
@@ -503,11 +545,11 @@ void _mi_mem_collect(mi_stats_t* stats) {
         m = mi_atomic_read_relaxed(&region->map);
       } while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
       if (m == 0) {
-        // on success, free the whole region (unless it was huge reserved)
+        // on success, free the whole region
         bool is_eager_committed;
         void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
-        if (start != NULL && !_mi_os_is_huge_reserved(start)) {
-          _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats);
+        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
+          _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats);
         }
         // and release
         mi_atomic_write(&region->info,0);
diff --git a/src/os.c b/src/os.c
index 8f5afc5b..85cd1a83 100644
--- a/src/os.c
+++ b/src/os.c
@@ -869,13 +869,13 @@ static void mi_os_free_huge_reserved() {
 */
 
 #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP)))
-int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
   UNUSED(pages); UNUSED(max_secs);
   if (pages_reserved != NULL) *pages_reserved = 0;
   return ENOMEM; 
 }
 #else
-int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
+int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
 {
   if (pages_reserved != NULL) *pages_reserved = 0;
   if (max_secs==0) return ETIMEDOUT; // timeout