Merge branch 'dev3' of /Volumes/T9/dev/mimalloc into dev3

2025-07-01 09:14:38 +03:00 · 2025-03-05 10:00:59 -08:00 · 2025-03-05 10:00:59 -08:00 · 38f59405e4
commit 38f59405e4
parent d36397245d 119f2eff6c
7 changed files with 29 additions and 29 deletions
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@ -160,7 +160,7 @@ bool          _mi_os_secure_guard_page_reset_at(void* addr);
 bool          _mi_os_secure_guard_page_reset_before(void* addr);

 int           _mi_os_numa_node(void);
-size_t        _mi_os_numa_node_count(void);
+int           _mi_os_numa_node_count(void);

 void*         _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
 void*         _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
--- a/include/mimalloc/types.h
+++ b/include/mimalloc/types.h
@ -343,10 +343,10 @@ typedef struct mi_page_s {
 // The max object size are checked to not waste more than 12.5% internally over the page sizes.
 #define MI_SMALL_MAX_OBJ_SIZE             ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8)   // < ~8 KiB
 #if MI_ENABLE_LARGE_PAGES
-#define MI_MEDIUM_MAX_OBJ_SIZE            ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8)  // < 64 KiB
+#define MI_MEDIUM_MAX_OBJ_SIZE            ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8)  // < ~64 KiB
 #define MI_LARGE_MAX_OBJ_SIZE             (MI_LARGE_PAGE_SIZE/8)    // <= 512KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
 #else
-#define MI_MEDIUM_MAX_OBJ_SIZE            (MI_MEDIUM_PAGE_SIZE/4)   // <= 128 KiB
+#define MI_MEDIUM_MAX_OBJ_SIZE            (MI_MEDIUM_PAGE_SIZE/8)   // <= 64 KiB
 #define MI_LARGE_MAX_OBJ_SIZE             MI_MEDIUM_MAX_OBJ_SIZE    // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
 #endif
 #define MI_LARGE_MAX_OBJ_WSIZE            (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
--- a/src/arena.c
+++ b/src/arena.c
@ -1524,17 +1524,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
  if (pages == 0) return 0;

  // pages per numa node
-  size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
-  if (numa_count <= 0) numa_count = 1;
+  int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
+  if (numa_count <= 0) { numa_count = 1; }
  const size_t pages_per = pages / numa_count;
  const size_t pages_mod = pages % numa_count;
  const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);

  // reserve evenly among numa nodes
-  for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
+  for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
    size_t node_pages = pages_per;  // can be 0
-    if (numa_node < pages_mod) node_pages++;
-    int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
+    if ((size_t)numa_node < pages_mod) { node_pages++; }
+    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
    if (err) return err;
    if (pages < node_pages) {
      pages = 0;
--- a/src/bitmap.c
+++ b/src/bitmap.c
@ -1560,11 +1560,18 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
      const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]);
      const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits);
      size_t eidx = 0;
-      mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`)
+      mi_bfield_cycle_iterate(cmap_entry, tseq, cmap_entry_cycle, eidx, Y) 
      {
        mi_assert_internal(eidx <= MI_BFIELD_BITS);
+        
+        // don't search into non-acgcessed memory until we tried other size bins as well
+        if (bin < bbin && eidx >= cmap_entry_cycle) break;
+
+        // get the chunk idx
        const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
        mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
+        mi_assert_internal(bin >= bbin || chunk_idx <= chunk_acc);
+        
        // only in the current size class!
        const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]);
        if ((mi_bbin_t)bin == chunk_bin || (bin == bbin && chunk_bin == MI_BBIN_NONE)) // only allow NONE at the final run
--- a/src/init.c
+++ b/src/init.c
@ -157,7 +157,7 @@ static mi_decl_cache_align mi_tld_t tld_main = {
 mi_decl_cache_align mi_heap_t heap_main = {
  &tld_main,              // thread local data
  NULL,                   // exclusive arena
-  0,                      // preferred numa node 
+  0,                      // preferred numa node
  0,                      // initial cookie
  //{ 0, 0 },               // the key of the main heap can be fixed (unlike page keys that need to be secure!)
  { {0x846ca68b}, {0}, 0, true },  // random
@ -690,15 +690,6 @@ void mi_process_init(void) mi_attr_noexcept {
  // the following two can potentially allocate (on freeBSD for locks and thread keys)
  mi_subproc_main_init();
  mi_process_setup_auto_thread_done();
-
-  #if MI_DEBUG
-  _mi_verbose_message("debug level : %d\n", MI_DEBUG);
-  #endif
-  _mi_verbose_message("secure level: %d\n", MI_SECURE);
-  _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL);
-  #if MI_TSAN
-  _mi_verbose_message("thread santizer enabled\n");
-  #endif
  mi_thread_init();

  #if defined(_WIN32) && defined(MI_WIN_USE_FLS)
--- a/src/options.c
+++ b/src/options.c
@ -175,7 +175,7 @@ static mi_option_desc_t options[_mi_option_last] =
  { 0,   UNINIT, MI_OPTION(max_vabits) },               // max virtual address space bits
  { MI_DEFAULT_PAGEMAP_COMMIT,
         UNINIT, MI_OPTION(pagemap_commit) },           // commit the full pagemap upfront?
-  { 0,   UNINIT, MI_OPTION(page_commit_on_demand) },    // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
+  { 1,   UNINIT, MI_OPTION(page_commit_on_demand) },    // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
  { 16,  UNINIT, MI_OPTION(page_reclaim_max) },         // don't reclaim pages if we already own N pages (in that size class)
 };

--- a/src/os.c
+++ b/src/os.c
@ -694,18 +694,19 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
 Support NUMA aware allocation
 -----------------------------------------------------------------------------*/

-static _Atomic(size_t)  _mi_numa_node_count; // = 0   // cache the node count
+static _Atomic(int)  _mi_numa_node_count; // = 0   // cache the node count

-size_t _mi_os_numa_node_count(void) {
+int _mi_os_numa_node_count(void) {
  size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
  if mi_unlikely(count <= 0) {
    long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
-    if (ncount > 0) {
-      count = (size_t)ncount;
+    if (ncount > 0 && ncount < INT_MAX) {
+      count = (int)ncount;
    }
    else {
-      count = _mi_prim_numa_node_count(); // or detect dynamically
-      if (count == 0) { count = 1; }
+      const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
+      if (n == 0 || n > INT_MAX) { count = 1; }
+                            else { count = (int)n; }
    }
    mi_atomic_store_release(&_mi_numa_node_count, count); // save it
    _mi_verbose_message("using %zd numa regions\n", count);
@ -715,12 +716,13 @@ size_t _mi_os_numa_node_count(void) {


 static int mi_os_numa_node_get(void) {
-  size_t numa_count = _mi_os_numa_node_count();
+  int numa_count = _mi_os_numa_node_count();
  if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
  // never more than the node count and >= 0
-  size_t numa_node = _mi_prim_numa_node();
+  const size_t n = _mi_prim_numa_node();
+  int numa_node = (n < INT_MAX ? (int)n : 0);
  if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
-  return (int)numa_node;
+  return numa_node;
 }

 int _mi_os_numa_node(void) {