mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-20 14:09:32 +03:00
Use much faster getcpu() via vDSO
vDSO (virtual dynamic shared object) is exported by Linux kernel into every userspace program, designed to speed up this process for certain system calls. For Linux/x86_64, getcpu() can be called via vDSO, which makes getcpu() much faster. The faster getcpu() invocation is beneficial when retrieving NUMA node information. Benchmarking[1] on AMD Ryzen Threadripper 2990WX 32-Core Processor: getcpu: syscall: 103 nsec/call getcpu: vdso: 18 nsec/call We can not use dlsym to resolve the vDSO symbol "__vdso_getcpu" directly becase it would cause recursive malloc calls when MI_DEBUG_FULL is enabled. [1] https://github.com/nathanlynch/vdsotest Co-authored-by: Chin-Hao Lo <hankluo6@gmail.com> Signed-off-by: Jim Huang <jserv@biilabs.io>
This commit is contained in:
parent
076f815cec
commit
6cd59aa50c
1 changed files with 87 additions and 3 deletions
90
src/os.c
90
src/os.c
|
@ -214,6 +214,81 @@ void _mi_os_init() {
|
|||
os_alloc_granularity = 16;
|
||||
}
|
||||
#else
|
||||
#if defined(__linux__) && defined(__x86_64__)
|
||||
#include <elf.h>
|
||||
#include <sys/auxv.h>
|
||||
typedef int (*getcpu_vdso_t)(unsigned*, unsigned*, void*);
|
||||
static getcpu_vdso_t mi_os_getcpu_vdso = NULL;
|
||||
static struct vdso_info {
|
||||
uintptr_t load_addr, load_offset;
|
||||
Elf64_Sym *symtab; // ELF symbol table
|
||||
const char *symstrings;
|
||||
void *bucket, *chain;
|
||||
Elf64_Word nbucket;
|
||||
} vdso_info;
|
||||
static void mi_os_vdso_init(void) {
|
||||
unsigned long base = getauxval(AT_SYSINFO_EHDR);
|
||||
if (!base)
|
||||
return;
|
||||
bool found_vaddr = false;
|
||||
vdso_info.load_addr = base;
|
||||
Elf64_Ehdr* hdr = (Elf64_Ehdr*)base;
|
||||
if (hdr->e_ident[EI_CLASS] != ELFCLASS64)
|
||||
return;
|
||||
Elf64_Phdr* pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff);
|
||||
Elf64_Dyn* dyn = 0;
|
||||
// we need to load offset and the dynamic table from the segment table
|
||||
for (size_t i = 0; i < hdr->e_phnum; i++) {
|
||||
if (pt[i].p_type == PT_LOAD && !found_vaddr) {
|
||||
found_vaddr = true;
|
||||
vdso_info.load_offset = base + (uintptr_t)pt[i].p_offset - (uintptr_t)pt[i].p_vaddr;
|
||||
} else if (pt[i].p_type == PT_DYNAMIC) {
|
||||
dyn = (Elf64_Dyn*)(base + pt[i].p_offset);
|
||||
}
|
||||
}
|
||||
if (!found_vaddr || !dyn)
|
||||
return;
|
||||
Elf64_Word* hash = 0;
|
||||
vdso_info.symstrings = NULL;
|
||||
vdso_info.symtab = NULL;
|
||||
for (size_t i = 0; dyn[i].d_tag != DT_NULL; i++) {
|
||||
switch (dyn[i].d_tag) {
|
||||
case DT_STRTAB:
|
||||
vdso_info.symstrings = (const char*)((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset);
|
||||
break;
|
||||
case DT_SYMTAB:
|
||||
vdso_info.symtab = (Elf64_Sym*)((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset);
|
||||
break;
|
||||
case DT_HASH:
|
||||
hash = (Elf64_Word*)((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
|
||||
return;
|
||||
vdso_info.nbucket = hash[0];
|
||||
vdso_info.bucket = &hash[2];
|
||||
vdso_info.chain = &hash[vdso_info.nbucket + 2];
|
||||
}
|
||||
static void* mi_os_vdso_get_sym(void) {
|
||||
const char *name = "__vdso_getcpu";
|
||||
Elf64_Word chain = ((Elf64_Word*)vdso_info.bucket)[11538501 % vdso_info.nbucket];
|
||||
for (; chain != STN_UNDEF; chain = ((Elf64_Word*)vdso_info.chain)[chain]) {
|
||||
Elf64_Sym* sym = &vdso_info.symtab[chain];
|
||||
// Check for a defined global or weak function with right name
|
||||
if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
|
||||
continue;
|
||||
if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ELF64_ST_BIND(sym->st_info) != STB_WEAK)
|
||||
continue;
|
||||
if (sym->st_shndx == SHN_UNDEF)
|
||||
continue;
|
||||
if (strcmp(name, vdso_info.symstrings + sym->st_name))
|
||||
continue;
|
||||
return (void*)(vdso_info.load_offset + sym->st_value);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
void _mi_os_init() {
|
||||
// get the page size
|
||||
long result = sysconf(_SC_PAGESIZE);
|
||||
|
@ -222,6 +297,11 @@ void _mi_os_init() {
|
|||
os_alloc_granularity = os_page_size;
|
||||
}
|
||||
large_os_page_size = 2*MiB; // TODO: can we query the OS for this?
|
||||
#if defined(__linux__) && defined(__x86_64__)
|
||||
// set up symbols exported by vDSO (virtual dynamic shared object)
|
||||
mi_os_vdso_init();
|
||||
mi_os_getcpu_vdso = (getcpu_vdso_t)mi_os_vdso_get_sym();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1173,9 +1253,13 @@ static size_t mi_os_numa_node_countx(void) {
|
|||
#include <stdio.h> // access
|
||||
|
||||
static size_t mi_os_numa_nodex(void) {
|
||||
#ifdef SYS_getcpu
|
||||
unsigned long node = 0;
|
||||
unsigned long ncpu = 0;
|
||||
#if defined(SYS_getcpu)
|
||||
unsigned int node = 0, ncpu = 0;
|
||||
#if defined(__x86_64__)
|
||||
if (mi_likely(mi_os_getcpu_vdso != NULL)) {
|
||||
return (mi_os_getcpu_vdso(&ncpu, &node, NULL) != -1) ? node : 0;
|
||||
}
|
||||
#endif
|
||||
long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
|
||||
if (err != 0) return 0;
|
||||
return node;
|
||||
|
|
Loading…
Add table
Reference in a new issue