improve and document numa support

This commit is contained in:
daan 2019-11-02 10:30:16 -07:00
parent 2c12d7f223
commit a69016c33e
2 changed files with 30 additions and 11 deletions

View file

@ -854,8 +854,11 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
if (p == NULL) return NULL; if (p == NULL) return NULL;
#ifdef MI_HAS_NUMA #ifdef MI_HAS_NUMA
if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
uintptr_t numa_mask = (1UL << numa_node); uintptr_t numa_mask = (1UL << numa_node);
// TODO: does `mbind` work correctly for huge OS pages? should we
// use `set_mempolicy` before calling mmap instead?
// see: <https://lkml.org/lkml/2017/2/9/875>
long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
if (err != 0) { if (err != 0) {
_mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
@ -883,6 +886,9 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) {
return p; return p;
} }
/* ----------------------------------------------------------------------------
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
#ifdef WIN32 #ifdef WIN32
static int mi_os_numa_nodex() { static int mi_os_numa_nodex() {
PROCESSOR_NUMBER pnum; PROCESSOR_NUMBER pnum;
@ -902,6 +908,9 @@ static int mi_os_numa_node_countx(void) {
#include <stdlib.h> #include <stdlib.h>
#include <numaif.h> #include <numaif.h>
static int mi_os_numa_nodex(void) { static int mi_os_numa_nodex(void) {
#define MI_NUMA_NODE_SLOW // too slow, so cache it
// TODO: perhaps use RDTSCP instruction on x64?
// see <https://stackoverflow.com/questions/16862620/numa-get-current-node-core>
#define MI_MAX_MASK (4) // support at most 256 nodes #define MI_MAX_MASK (4) // support at most 256 nodes
unsigned long mask[MI_MAX_MASK]; unsigned long mask[MI_MAX_MASK];
memset(mask,0,MI_MAX_MASK*sizeof(long)); memset(mask,0,MI_MAX_MASK*sizeof(long));
@ -945,7 +954,7 @@ static int mi_os_numa_node_countx(void) {
#endif #endif
int _mi_os_numa_node_count(void) { int _mi_os_numa_node_count(void) {
static int numa_node_count = 0; static int numa_node_count = 0; // cache the node count
if (mi_unlikely(numa_node_count <= 0)) { if (mi_unlikely(numa_node_count <= 0)) {
int ncount = mi_os_numa_node_countx(); int ncount = mi_os_numa_node_countx();
// never more than max numa node and at least 1 // never more than max numa node and at least 1
@ -959,14 +968,24 @@ int _mi_os_numa_node_count(void) {
} }
int _mi_os_numa_node(mi_os_tld_t* tld) { int _mi_os_numa_node(mi_os_tld_t* tld) {
int numa_node;
#ifndef MI_NUMA_NODE_SLOW
UNUSED(tld);
numa_node = mi_os_numa_nodex();
#else
if (mi_unlikely(tld->numa_node < 0)) { if (mi_unlikely(tld->numa_node < 0)) {
int nnode = mi_os_numa_nodex(); // Cache the NUMA node of the thread if the call is slow.
// never more than the node count // This may not be correct as threads can migrate to another cpu on
int ncount = _mi_os_numa_node_count(); // another node -- however, for memory allocation this just means we keep
if (nnode >= ncount) { nnode = nnode % ncount; } // using the same 'node id' for its allocations; new OS allocations
if (nnode < 0) nnode = 0; // naturally come from the actual node so in practice this may be fine.
tld->numa_node = nnode; tld->numa_node = mi_os_numa_nodex();
} }
mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count()); numa_node = tld->numa_node
return tld->numa_node; #endif
// never more than the node count and >= 0
int numa_count = _mi_os_numa_node_count();
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
if (numa_node < 0) numa_node = 0;
return numa_node;
} }

View file

@ -24,7 +24,7 @@ public:
int main() { int main() {
//mi_stats_reset(); // ignore earlier allocations mi_stats_reset(); // ignore earlier allocations
atexit(free_p); atexit(free_p);
void* p1 = malloc(78); void* p1 = malloc(78);
void* p2 = mi_malloc_aligned(16,24); void* p2 = mi_malloc_aligned(16,24);