From d01ed42bcb755ed6c1b52bfd8a306821da098dd5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 13 Nov 2019 13:35:50 -0800 Subject: [PATCH] replace max_numa_nodes by use_numa_nodes (to help with wrong detection of numa nodes on WSL for example) --- include/mimalloc-internal.h | 8 +++--- include/mimalloc.h | 4 +-- src/arena.c | 15 +++++------ src/init.c | 2 +- src/options.c | 4 +-- src/os.c | 54 +++++++++++++++++-------------------- 6 files changed, 40 insertions(+), 47 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 668a7bd3..77045a99 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -436,15 +436,15 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // Optimize numa node access for the common case (= one node) // ------------------------------------------------------------------- -int _mi_os_numa_node_get(mi_os_tld_t* tld); -int _mi_os_numa_node_count_get(void); +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); -extern int _mi_numa_node_count; +extern size_t _mi_numa_node_count; static inline int _mi_os_numa_node(mi_os_tld_t* tld) { if (mi_likely(_mi_numa_node_count == 1)) return 0; else return _mi_os_numa_node_get(tld); } -static inline int _mi_os_numa_node_count(void) { +static inline size_t _mi_os_numa_node_count(void) { if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; else return _mi_os_numa_node_count_get(); } diff --git a/include/mimalloc.h b/include/mimalloc.h index 8d029135..3c942849 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -230,7 +230,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; // deprecated @@ -274,8 +274,8 @@ typedef enum mi_option_e { mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_reset_delay, + mi_option_use_numa_nodes, mi_option_os_tag, - mi_option_max_numa_nodes, mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/arena.c b/src/arena.c index 02890bd6..46741208 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,7 +42,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_sec void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- Arena allocation @@ -317,22 +316,22 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } -// reserve huge pages evenly among all numa nodes. -int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept { +// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { if (pages == 0) return 0; // pages per numa node - int numa_count = _mi_os_numa_node_count(); + size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs / numa_count) + 50; // reserve evenly among numa nodes - for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if ((size_t)numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; @@ -349,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0)); + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/init.c b/src/init.c index 473e9a32..72543b95 100644 --- a/src/init.c +++ b/src/init.c @@ -469,7 +469,7 @@ void mi_process_init(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, pages*500); + mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); } } diff --git a/src/options.c b/src/options.c index bbea4e67..180f6a75 100644 --- a/src/options.c +++ b/src/options.c @@ -69,9 +69,9 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_nodes) }, // use at most N numa nodes - { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index 93fb8b31..2415a40d 100644 --- a/src/os.c +++ b/src/os.c @@ -968,66 +968,61 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef WIN32 -static int mi_os_numa_nodex() { +static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); GetNumaProcessorNodeEx(&pnum,&numa_node); - return (int)numa_node; + return numa_node; } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { ULONG numa_max = 0; GetNumaHighestNodeNumber(&numa_max); - return (int)(numa_max + 1); + return (numa_max + 1); } #elif defined(__linux__) #include // getcpu #include // access -static int mi_os_numa_nodex(void) { +static size_t mi_os_numa_nodex(void) { #ifdef SYS_getcpu - unsigned node = 0; - unsigned ncpu = 0; - int err = syscall(SYS_getcpu, &ncpu, &node, NULL); + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; - return (int)node; + return node; #else return 0; #endif } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { char buf[128]; - int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness) - int node = 0; - for(node = 0; node < max_nodes; node++) { + unsigned node = 0; + for(node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) - snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); if (access(buf,R_OK) != 0) break; } return (node+1); } #else -static int mi_os_numa_nodex(void) { +static size_t mi_os_numa_nodex(void) { return 0; } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { return 1; } #endif -int _mi_numa_node_count = 0; // cache the node count +size_t _mi_numa_node_count = 0; // cache the node count -int _mi_os_numa_node_count_get(void) { +size_t _mi_os_numa_node_count_get(void) { if (mi_unlikely(_mi_numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); - int ncount0 = ncount; - // never more than max numa node and at least 1 - int nmax = (int)mi_option_get(mi_option_max_numa_nodes); - if (ncount > nmax) ncount = nmax; - if (ncount <= 0) ncount = 1; - _mi_numa_node_count = ncount; - _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0); + long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? + if (ncount <= 0) ncount = (long)mi_os_numa_node_countx(); // or detect dynamically + _mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount); + _mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count); } mi_assert_internal(_mi_numa_node_count >= 1); return _mi_numa_node_count; @@ -1035,11 +1030,10 @@ int _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); - int numa_count = _mi_os_numa_node_count(); + size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - int numa_node = mi_os_numa_nodex(); + size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - if (numa_node < 0) numa_node = 0; - return numa_node; + return (int)numa_node; }