Merge branch 'dev3' of /Volumes/T9/dev/mimalloc into dev3

This commit is contained in:
Daan 2025-03-05 10:00:59 -08:00
commit 38f59405e4
7 changed files with 29 additions and 29 deletions

View file

@ -160,7 +160,7 @@ bool _mi_os_secure_guard_page_reset_at(void* addr);
bool _mi_os_secure_guard_page_reset_before(void* addr);
int _mi_os_numa_node(void);
size_t _mi_os_numa_node_count(void);
int _mi_os_numa_node_count(void);
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);

View file

@ -343,10 +343,10 @@ typedef struct mi_page_s {
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < ~8 KiB
#if MI_ENABLE_LARGE_PAGES
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 64 KiB
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < ~64 KiB
#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/8) // <= 512KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
#else
#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/4) // <= 128 KiB
#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/8) // <= 64 KiB
#define MI_LARGE_MAX_OBJ_SIZE MI_MEDIUM_MAX_OBJ_SIZE // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
#endif
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)

View file

@ -1524,17 +1524,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
if (pages == 0) return 0;
// pages per numa node
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) numa_count = 1;
int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) { numa_count = 1; }
const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
// reserve evenly among numa nodes
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0
if (numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
if ((size_t)numa_node < pages_mod) { node_pages++; }
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
if (err) return err;
if (pages < node_pages) {
pages = 0;

View file

@ -1560,11 +1560,18 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]);
const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits);
size_t eidx = 0;
mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`)
mi_bfield_cycle_iterate(cmap_entry, tseq, cmap_entry_cycle, eidx, Y)
{
mi_assert_internal(eidx <= MI_BFIELD_BITS);
// don't search into non-acgcessed memory until we tried other size bins as well
if (bin < bbin && eidx >= cmap_entry_cycle) break;
// get the chunk idx
const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
mi_assert_internal(bin >= bbin || chunk_idx <= chunk_acc);
// only in the current size class!
const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]);
if ((mi_bbin_t)bin == chunk_bin || (bin == bbin && chunk_bin == MI_BBIN_NONE)) // only allow NONE at the final run

View file

@ -157,7 +157,7 @@ static mi_decl_cache_align mi_tld_t tld_main = {
mi_decl_cache_align mi_heap_t heap_main = {
&tld_main, // thread local data
NULL, // exclusive arena
0, // preferred numa node
0, // preferred numa node
0, // initial cookie
//{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0, true }, // random
@ -690,15 +690,6 @@ void mi_process_init(void) mi_attr_noexcept {
// the following two can potentially allocate (on freeBSD for locks and thread keys)
mi_subproc_main_init();
mi_process_setup_auto_thread_done();
#if MI_DEBUG
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif
_mi_verbose_message("secure level: %d\n", MI_SECURE);
_mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL);
#if MI_TSAN
_mi_verbose_message("thread santizer enabled\n");
#endif
mi_thread_init();
#if defined(_WIN32) && defined(MI_WIN_USE_FLS)

View file

@ -175,7 +175,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(max_vabits) }, // max virtual address space bits
{ MI_DEFAULT_PAGEMAP_COMMIT,
UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront?
{ 0, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
{ 1, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
{ 16, UNINIT, MI_OPTION(page_reclaim_max) }, // don't reclaim pages if we already own N pages (in that size class)
};

View file

@ -694,18 +694,19 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
static _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count
size_t _mi_os_numa_node_count(void) {
int _mi_os_numa_node_count(void) {
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
if mi_unlikely(count <= 0) {
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount > 0) {
count = (size_t)ncount;
if (ncount > 0 && ncount < INT_MAX) {
count = (int)ncount;
}
else {
count = _mi_prim_numa_node_count(); // or detect dynamically
if (count == 0) { count = 1; }
const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
if (n == 0 || n > INT_MAX) { count = 1; }
else { count = (int)n; }
}
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
_mi_verbose_message("using %zd numa regions\n", count);
@ -715,12 +716,13 @@ size_t _mi_os_numa_node_count(void) {
static int mi_os_numa_node_get(void) {
size_t numa_count = _mi_os_numa_node_count();
int numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0
size_t numa_node = _mi_prim_numa_node();
const size_t n = _mi_prim_numa_node();
int numa_node = (n < INT_MAX ? (int)n : 0);
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return (int)numa_node;
return numa_node;
}
int _mi_os_numa_node(void) {