merge from dev3

2025-09-17 11:44:47 +03:00 · 2024-12-25 11:47:54 -08:00 · 2024-12-25 11:47:54 -08:00 · b5c4a3c6e7
commit b5c4a3c6e7
parent e64d6fcc47 8339cefdeb
7 changed files with 70 additions and 55 deletions
--- a/src/arena.c
+++ b/src/arena.c
@ -213,10 +213,6 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
      // now actually commit
      bool commit_zero = false;
      if (!_mi_os_commit_ex(p, mi_size_of_slices(slice_count), &commit_zero, mi_size_of_slices(slice_count - already_committed_count))) {
-        // failed to commit (todo: give warning?)
-        if (already_committed_count > 0) {
-          mi_subproc_stat_increase(arena->subproc, committed, mi_size_of_slices(already_committed_count));
-        }
        memid->initially_committed = false;
      }
      else {
@ -250,7 +246,15 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
  }
  else {
    // no need to commit, but check if already fully committed
+    // commit requested, but the range may not be committed as a whole: ensure it is committed now
    memid->initially_committed = mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count);
+    if (!memid->initially_committed) {
+      // partly committed.. adjust stats
+      size_t already_committed_count = 0;
+      mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count);
+      mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count);
+      mi_os_stat_decrease(committed, mi_size_of_slices(already_committed_count));
+    }
  }

  mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
@ -308,7 +312,9 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_
  // on an OS with overcommit (Linux) we don't count the commit yet as it is on-demand. Once a slice
  // is actually allocated for the first time it will be counted.
  const bool adjust = (overcommit && arena_commit);
-  if (adjust) { mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve, true /* on alloc */); }
+  if (adjust) {
+    mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve, true /* on alloc */);
+  }
  // and try to reserve the arena
  int err = mi_reserve_os_memory_ex2(subproc, arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id);
  if (err != 0) {
@ -563,7 +569,6 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_
      _mi_page_free_collect(page, false);  // update `used` count
      mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
      mi_assert_internal(page->slice_committed > 0 || mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
-      mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
      mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count));
      mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
      mi_assert_internal(_mi_ptr_page(page)==page);
@ -723,7 +728,7 @@ static mi_page_t* mi_arenas_page_regular_alloc(mi_heap_t* heap, size_t slice_cou
  // 2. find a free block, potentially allocating a new arena
  const long commit_on_demand = mi_option_get(mi_option_page_commit_on_demand);
  const bool commit = (slice_count <= mi_slice_count_of_size(MI_PAGE_MIN_COMMIT_SIZE) ||  // always commit small pages
-                       (commit_on_demand == 2 && _mi_os_has_overcommit()) || (commit_on_demand == 1));
+                       (commit_on_demand == 2 && _mi_os_has_overcommit()) || (commit_on_demand == 0));
  page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq, commit);
  if (page != NULL) {
    mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count);
@ -827,6 +832,7 @@ void _mi_arenas_page_free(mi_page_t* page) {
      const size_t total_slices = page->slice_committed / MI_ARENA_SLICE_SIZE;  // conservative
      //mi_assert_internal(mi_bitmap_is_clearN(arena->slices_committed, page->memid.mem.arena.slice_index, total_slices));
      mi_assert_internal(page->memid.mem.arena.slice_count >= total_slices);
+      mi_assert_internal(total_slices > 0);
      if (total_slices > 0) {
        mi_bitmap_setN(arena->slices_committed, page->memid.mem.arena.slice_index, total_slices, NULL);
      }
@ -1434,14 +1440,14 @@ void mi_debug_show_arenas(bool show_pages) mi_attr_noexcept {
  mi_subproc_t* subproc = _mi_subproc();
  size_t max_arenas = mi_arenas_get_count(subproc);
  //size_t free_total = 0;
-  size_t slice_total = 0;
+  //size_t slice_total = 0;
  //size_t abandoned_total = 0;
  size_t page_total = 0;
  for (size_t i = 0; i < max_arenas; i++) {
    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]);
    if (arena == NULL) break;
    mi_assert(arena->subproc == subproc);
-    slice_total += arena->slice_count;
+    // slice_total += arena->slice_count;
    _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""), arena->subproc);
    //if (show_inuse) {
    //  free_total += mi_debug_show_bbitmap("in-use slices", arena->slice_count, arena->slices_free, true, NULL);
--- a/src/options.c
+++ b/src/options.c
@ -144,7 +144,7 @@ static mi_option_desc_t options[_mi_option_last] =
 #else
  { 1, UNINIT, MI_OPTION(eager_commit_delay) },         // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
 #endif
-  { 2500,UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) },  // purge delay in milli-seconds
+  { 0,   UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) },  // purge delay in milli-seconds
  { 0,   UNINIT, MI_OPTION(use_numa_nodes) },           // 0 = use available numa nodes, otherwise use at most N nodes.
  { 0,   UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) },           // 1 = do not use OS memory for allocation (but only reserved arenas)
  { 100, UNINIT, MI_OPTION(os_tag) },                   // only apple specific for now but might serve more or less related purpose
--- a/src/os.c
+++ b/src/os.c
@ -143,22 +143,21 @@ bool _mi_os_secure_guard_page_reset_before(void* addr) {
  return _mi_os_secure_guard_page_reset_at((uint8_t*)addr - _mi_os_secure_guard_page_size());
 }

-
 /* -----------------------------------------------------------
  Free memory
 -------------------------------------------------------------- */

 static void mi_os_free_huge_os_pages(void* p, size_t size);

-static void mi_os_prim_free(void* addr, size_t size, bool still_committed) {
+static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) {
  mi_assert_internal((size % _mi_os_page_size()) == 0);
  if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
  int err = _mi_prim_free(addr, size);
  if (err != 0) {
    _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
  }
-  if (still_committed) {
-    mi_os_stat_decrease(committed, size);
+  if (commit_size > 0) {
+    mi_os_stat_decrease(committed, commit_size);
  }
  mi_os_stat_decrease(reserved, size);
 }
@ -167,13 +166,19 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
  if (mi_memkind_is_os(memid.memkind)) {
    size_t csize = memid.mem.os.size;
    if (csize==0) { _mi_os_good_alloc_size(size); }
+    size_t commit_size = (still_committed ? csize : 0);
    void* base = addr;
    // different base? (due to alignment)
    if (memid.mem.os.base != base) {
      mi_assert(memid.mem.os.base <= addr);
-      // mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
      base = memid.mem.os.base;
-      if (memid.mem.os.size==0) { csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); }
+      const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base;
+      if (memid.mem.os.size==0) {
+        csize += diff;
+      }
+      if (still_committed) {
+        commit_size -= diff;  // the (addr-base) part was already un-committed
+      }
    }
    // free it
    if (memid.memkind == MI_MEM_OS_HUGE) {
@ -181,7 +186,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
      mi_os_free_huge_os_pages(base, csize);
    }
    else {
-      mi_os_prim_free(base, csize, still_committed);
+      mi_os_prim_free(base, csize, (still_committed ? commit_size : 0));
    }
  }
  else {
@ -266,7 +271,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
      _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
    }
    #endif
-    if (p != NULL) { mi_os_prim_free(p, size, commit); }
+    if (p != NULL) { mi_os_prim_free(p, size, (commit ? size : 0)); }
    if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
    const size_t over_size = size + alignment;

@ -297,8 +302,8 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
      size_t mid_size = _mi_align_up(size, _mi_os_page_size());
      size_t post_size = over_size - pre_size - mid_size;
      mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
-      if (pre_size > 0)  { mi_os_prim_free(p, pre_size, commit); }
-      if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit); }
+      if (pre_size > 0)  { mi_os_prim_free(p, pre_size, (commit ? pre_size : 0)); }
+      if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, (commit ? post_size : 0)); }
      // we can return the aligned pointer on `mmap` systems
      p = aligned_p;
      *base = aligned_p; // since we freed the pre part, `*base == p`.
@ -454,9 +459,9 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero) {
  return _mi_os_commit_ex(addr, size, is_zero, size);
 }

-static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, size_t stats_size) {
+static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, size_t stat_size) {
  mi_assert_internal(needs_recommit!=NULL);
-  mi_os_stat_decrease(committed, stats_size);
+  mi_os_stat_decrease(committed, stat_size);

  // page align
  size_t csize;
@ -505,7 +510,7 @@ bool _mi_os_reset(void* addr, size_t size) {

 // either resets or decommits memory, returns true if the memory needs
 // to be recommitted if it is to be re-used later on.
-bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stats_size)
+bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size)
 {
  if (mi_option_get(mi_option_purge_delay) < 0) return false;  // is purging allowed?
  mi_os_stat_counter_increase(purge_calls, 1);
@ -515,7 +520,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stats_size)
    !_mi_preloading())                                     // don't decommit during preloading (unsafe)
  {
    bool needs_recommit = true;
-    mi_os_decommit_ex(p, size, &needs_recommit, stats_size);
+    mi_os_decommit_ex(p, size, &needs_recommit, stat_size);
    return needs_recommit;
  }
  else {
@ -636,7 +641,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
      // no success, issue a warning and break
      if (p != NULL) {
        _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
-        mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true);
+        mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, MI_HUGE_OS_PAGE_SIZE);
      }
      break;
    }
@ -682,7 +687,7 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
  if (p==NULL || size==0) return;
  uint8_t* base = (uint8_t*)p;
  while (size >= MI_HUGE_OS_PAGE_SIZE) {
-    mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true);
+    mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, MI_HUGE_OS_PAGE_SIZE);
    size -= MI_HUGE_OS_PAGE_SIZE;
    base += MI_HUGE_OS_PAGE_SIZE;
  }
--- a/src/page-map.c
+++ b/src/page-map.c
@ -160,6 +160,7 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att
 #else

 // A 2-level page map
+#define MI_PAGE_MAP_SUB_SIZE    (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))

 mi_decl_cache_align mi_page_t*** _mi_page_map;
 static void*        mi_page_map_max_address;
@ -167,6 +168,7 @@ static mi_memid_t   mi_page_map_memid;

 static _Atomic(mi_bfield_t)  mi_page_map_commit; 

+static mi_page_t** mi_page_map_ensure_committed(size_t idx);
 static mi_page_t** mi_page_map_ensure_at(size_t idx);
 static inline void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count);

@ -200,16 +202,17 @@ bool _mi_page_map_init(void) {
  }
  mi_atomic_store_release(&mi_page_map_commit, (commit ? ~MI_ZU(0) : MI_ZU(0)));

-  // commit the first part so NULL pointers get resolved without an access violation
-  mi_page_map_ensure_at(0);
-  
-  // note: for the NULL range we only commit one OS page
-  // mi_page_map_set_range(NULL, 0, 0, 1);
-  _mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size);
+  // note: for the NULL range we only commit one OS page (in the map and sub)
  if (!mi_page_map_memid.initially_committed) {
-    _mi_os_commit(_mi_page_map[0], os_page_size, NULL);
+    _mi_os_commit(&_mi_page_map[0], os_page_size, NULL);  // commit first part of the map
+  }
+  _mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size);  // we reserved 2 subs at the end already
+  if (!mi_page_map_memid.initially_committed) {
+    _mi_os_commit(_mi_page_map[0], os_page_size, NULL);   // only first OS page
+  }
+  if (!mi_page_map_memid.initially_zero) {
+    _mi_page_map[0][0] = NULL;
  }
-  _mi_page_map[0][0] = NULL;

  mi_assert_internal(_mi_ptr_page(NULL)==NULL);
  return true;
--- a/test/test-stress.c
+++ b/test/test-stress.c
@ -48,7 +48,7 @@ static int ITER    = 20;
 static int THREADS = 32;
 static int SCALE   = 50;
 static int ITER    = 50;
-#elif 1
+#elif 0
 static int THREADS = 32;
 static int SCALE   = 25;
 static int ITER    = 50;
@ -82,6 +82,7 @@ static bool   main_participates = false;       // main thread participates as a
 #define custom_calloc(n,s)    mi_calloc(n,s)
 #define custom_realloc(p,s)   mi_realloc(p,s)
 #define custom_free(p)        mi_free(p)
+
 #ifndef NDEBUG
 #define xHEAP_WALK             // walk the heap objects?
 #endif