Merge branch 'dev3' into dev3-binx

This commit is contained in:
daanx 2025-03-05 20:47:05 -08:00
commit dd3a74d89d
5 changed files with 55 additions and 59 deletions

View file

@ -1525,17 +1525,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
if (pages == 0) return 0;
// pages per numa node
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) numa_count = 1;
int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) { numa_count = 1; }
const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
// reserve evenly among numa nodes
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0
if (numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
if ((size_t)numa_node < pages_mod) { node_pages++; }
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
if (err) return err;
if (pages < node_pages) {
pages = 0;

View file

@ -663,7 +663,7 @@ static void mi_detect_cpu_features(void) {
int32_t cpu_info[4];
__cpuid(cpu_info, 7);
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
_mi_cpu_has_erms = ((cpu_info[2] & (1 << 9)) != 0); // bit 9 of ECX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
_mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
}
#else
static void mi_detect_cpu_features(void) {

View file

@ -694,18 +694,19 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
static _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count
size_t _mi_os_numa_node_count(void) {
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
int _mi_os_numa_node_count(void) {
int count = mi_atomic_load_acquire(&_mi_numa_node_count);
if mi_unlikely(count <= 0) {
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount > 0) {
count = (size_t)ncount;
if (ncount > 0 && ncount < INT_MAX) {
count = (int)ncount;
}
else {
count = _mi_prim_numa_node_count(); // or detect dynamically
if (count == 0) { count = 1; }
const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
if (n == 0 || n > INT_MAX) { count = 1; }
else { count = (int)n; }
}
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
_mi_verbose_message("using %zd numa regions\n", count);
@ -715,12 +716,13 @@ size_t _mi_os_numa_node_count(void) {
static int mi_os_numa_node_get(void) {
size_t numa_count = _mi_os_numa_node_count();
int numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0
size_t numa_node = _mi_prim_numa_node();
const size_t n = _mi_prim_numa_node();
int numa_node = (n < INT_MAX ? (int)n : 0);
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return (int)numa_node;
return numa_node;
}
int _mi_os_numa_node(void) {