replace max_numa_nodes by use_numa_nodes (to help with wrong detection of numa nodes on WSL for example)

This commit is contained in:
Daan Leijen 2019-11-13 13:35:50 -08:00
parent bdb8274819
commit d01ed42bcb
6 changed files with 40 additions and 47 deletions

View file

@ -436,15 +436,15 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
// Optimize numa node access for the common case (= one node)
// -------------------------------------------------------------------
int _mi_os_numa_node_get(mi_os_tld_t* tld);
int _mi_os_numa_node_count_get(void);
int _mi_os_numa_node_get(mi_os_tld_t* tld);
size_t _mi_os_numa_node_count_get(void);
extern int _mi_numa_node_count;
extern size_t _mi_numa_node_count;
static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
if (mi_likely(_mi_numa_node_count == 1)) return 0;
else return _mi_os_numa_node_get(tld);
}
static inline int _mi_os_numa_node_count(void) {
static inline size_t _mi_os_numa_node_count(void) {
if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count;
else return _mi_os_numa_node_count_get();
}

View file

@ -230,7 +230,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
// deprecated
@ -274,8 +274,8 @@ typedef enum mi_option_e {
mi_option_segment_reset,
mi_option_eager_commit_delay,
mi_option_reset_delay,
mi_option_use_numa_nodes,
mi_option_os_tag,
mi_option_max_numa_nodes,
mi_option_max_errors,
_mi_option_last
} mi_option_t;

View file

@ -42,7 +42,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_sec
void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
int _mi_os_numa_node_count(void);
/* -----------------------------------------------------------
Arena allocation
@ -317,22 +316,22 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
}
// reserve huge pages evenly among all numa nodes.
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept {
// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
if (pages == 0) return 0;
// pages per numa node
int numa_count = _mi_os_numa_node_count();
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) numa_count = 1;
const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs / numa_count) + 50;
// reserve evenly among numa nodes
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0
if ((size_t)numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
if (numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
if (err) return err;
if (pages < node_pages) {
pages = 0;
@ -349,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
UNUSED(max_secs);
_mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
if (pages_reserved != NULL) *pages_reserved = 0;
int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0));
int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
return err;
}

View file

@ -469,7 +469,7 @@ void mi_process_init(void) mi_attr_noexcept {
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
mi_reserve_huge_os_pages_interleave(pages, pages*500);
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
}
}

View file

@ -69,9 +69,9 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 256, UNINIT, MI_OPTION(max_numa_nodes) }, // use at most N numa nodes
{ 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output
{ 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output
};
static void mi_option_init(mi_option_desc_t* desc);

View file

@ -968,66 +968,61 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
#ifdef WIN32
static int mi_os_numa_nodex() {
static size_t mi_os_numa_nodex() {
PROCESSOR_NUMBER pnum;
USHORT numa_node = 0;
GetCurrentProcessorNumberEx(&pnum);
GetNumaProcessorNodeEx(&pnum,&numa_node);
return (int)numa_node;
return numa_node;
}
static int mi_os_numa_node_countx(void) {
static size_t mi_os_numa_node_countx(void) {
ULONG numa_max = 0;
GetNumaHighestNodeNumber(&numa_max);
return (int)(numa_max + 1);
return (numa_max + 1);
}
#elif defined(__linux__)
#include <sys/syscall.h> // getcpu
#include <stdio.h> // access
static int mi_os_numa_nodex(void) {
static size_t mi_os_numa_nodex(void) {
#ifdef SYS_getcpu
unsigned node = 0;
unsigned ncpu = 0;
int err = syscall(SYS_getcpu, &ncpu, &node, NULL);
unsigned long node = 0;
unsigned long ncpu = 0;
long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
if (err != 0) return 0;
return (int)node;
return node;
#else
return 0;
#endif
}
static int mi_os_numa_node_countx(void) {
static size_t mi_os_numa_node_countx(void) {
char buf[128];
int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness)
int node = 0;
for(node = 0; node < max_nodes; node++) {
unsigned node = 0;
for(node = 0; node < 256; node++) {
// enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1);
snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
if (access(buf,R_OK) != 0) break;
}
return (node+1);
}
#else
static int mi_os_numa_nodex(void) {
static size_t mi_os_numa_nodex(void) {
return 0;
}
static int mi_os_numa_node_countx(void) {
static size_t mi_os_numa_node_countx(void) {
return 1;
}
#endif
int _mi_numa_node_count = 0; // cache the node count
size_t _mi_numa_node_count = 0; // cache the node count
int _mi_os_numa_node_count_get(void) {
size_t _mi_os_numa_node_count_get(void) {
if (mi_unlikely(_mi_numa_node_count <= 0)) {
int ncount = mi_os_numa_node_countx();
int ncount0 = ncount;
// never more than max numa node and at least 1
int nmax = (int)mi_option_get(mi_option_max_numa_nodes);
if (ncount > nmax) ncount = nmax;
if (ncount <= 0) ncount = 1;
_mi_numa_node_count = ncount;
_mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0);
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount <= 0) ncount = (long)mi_os_numa_node_countx(); // or detect dynamically
_mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount);
_mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count);
}
mi_assert_internal(_mi_numa_node_count >= 1);
return _mi_numa_node_count;
@ -1035,11 +1030,10 @@ int _mi_os_numa_node_count_get(void) {
int _mi_os_numa_node_get(mi_os_tld_t* tld) {
UNUSED(tld);
int numa_count = _mi_os_numa_node_count();
size_t numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0
int numa_node = mi_os_numa_nodex();
size_t numa_node = mi_os_numa_nodex();
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
if (numa_node < 0) numa_node = 0;
return numa_node;
return (int)numa_node;
}