From 6cd59aa50c2ce727f8a2e02492ad125eb856998d Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Fri, 25 Jun 2021 22:41:44 +0800 Subject: [PATCH] Use much faster getcpu() via vDSO vDSO (virtual dynamic shared object) is exported by Linux kernel into every userspace program, designed to speed up this process for certain system calls. For Linux/x86_64, getcpu() can be called via vDSO, which makes getcpu() much faster. The faster getcpu() invocation is beneficial when retrieving NUMA node information. Benchmarking[1] on AMD Ryzen Threadripper 2990WX 32-Core Processor: getcpu: syscall: 103 nsec/call getcpu: vdso: 18 nsec/call We can not use dlsym to resolve the vDSO symbol "__vdso_getcpu" directly becase it would cause recursive malloc calls when MI_DEBUG_FULL is enabled. [1] https://github.com/nathanlynch/vdsotest Co-authored-by: Chin-Hao Lo Signed-off-by: Jim Huang --- src/os.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index 85415232..3d9ff30b 100644 --- a/src/os.c +++ b/src/os.c @@ -214,6 +214,81 @@ void _mi_os_init() { os_alloc_granularity = 16; } #else +#if defined(__linux__) && defined(__x86_64__) +#include +#include +typedef int (*getcpu_vdso_t)(unsigned*, unsigned*, void*); +static getcpu_vdso_t mi_os_getcpu_vdso = NULL; +static struct vdso_info { + uintptr_t load_addr, load_offset; + Elf64_Sym *symtab; // ELF symbol table + const char *symstrings; + void *bucket, *chain; + Elf64_Word nbucket; +} vdso_info; +static void mi_os_vdso_init(void) { + unsigned long base = getauxval(AT_SYSINFO_EHDR); + if (!base) + return; + bool found_vaddr = false; + vdso_info.load_addr = base; + Elf64_Ehdr* hdr = (Elf64_Ehdr*)base; + if (hdr->e_ident[EI_CLASS] != ELFCLASS64) + return; + Elf64_Phdr* pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff); + Elf64_Dyn* dyn = 0; + // we need to load offset and the dynamic table from the segment table + for (size_t i = 0; i < hdr->e_phnum; i++) { + if (pt[i].p_type == PT_LOAD && !found_vaddr) { + found_vaddr = true; + vdso_info.load_offset = base + (uintptr_t)pt[i].p_offset - (uintptr_t)pt[i].p_vaddr; + } else if (pt[i].p_type == PT_DYNAMIC) { + dyn = (Elf64_Dyn*)(base + pt[i].p_offset); + } + } + if (!found_vaddr || !dyn) + return; + Elf64_Word* hash = 0; + vdso_info.symstrings = NULL; + vdso_info.symtab = NULL; + for (size_t i = 0; dyn[i].d_tag != DT_NULL; i++) { + switch (dyn[i].d_tag) { + case DT_STRTAB: + vdso_info.symstrings = (const char*)((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); + break; + case DT_SYMTAB: + vdso_info.symtab = (Elf64_Sym*)((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); + break; + case DT_HASH: + hash = (Elf64_Word*)((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); + break; + } + } + if (!vdso_info.symstrings || !vdso_info.symtab || !hash) + return; + vdso_info.nbucket = hash[0]; + vdso_info.bucket = &hash[2]; + vdso_info.chain = &hash[vdso_info.nbucket + 2]; +} +static void* mi_os_vdso_get_sym(void) { + const char *name = "__vdso_getcpu"; + Elf64_Word chain = ((Elf64_Word*)vdso_info.bucket)[11538501 % vdso_info.nbucket]; + for (; chain != STN_UNDEF; chain = ((Elf64_Word*)vdso_info.chain)[chain]) { + Elf64_Sym* sym = &vdso_info.symtab[chain]; + // Check for a defined global or weak function with right name + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) + continue; + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ELF64_ST_BIND(sym->st_info) != STB_WEAK) + continue; + if (sym->st_shndx == SHN_UNDEF) + continue; + if (strcmp(name, vdso_info.symstrings + sym->st_name)) + continue; + return (void*)(vdso_info.load_offset + sym->st_value); + } + return 0; +} +#endif void _mi_os_init() { // get the page size long result = sysconf(_SC_PAGESIZE); @@ -222,6 +297,11 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } large_os_page_size = 2*MiB; // TODO: can we query the OS for this? +#if defined(__linux__) && defined(__x86_64__) + // set up symbols exported by vDSO (virtual dynamic shared object) + mi_os_vdso_init(); + mi_os_getcpu_vdso = (getcpu_vdso_t)mi_os_vdso_get_sym(); +#endif } #endif @@ -1173,9 +1253,13 @@ static size_t mi_os_numa_node_countx(void) { #include // access static size_t mi_os_numa_nodex(void) { -#ifdef SYS_getcpu - unsigned long node = 0; - unsigned long ncpu = 0; +#if defined(SYS_getcpu) + unsigned int node = 0, ncpu = 0; +#if defined(__x86_64__) + if (mi_likely(mi_os_getcpu_vdso != NULL)) { + return (mi_os_getcpu_vdso(&ncpu, &node, NULL) != -1) ? node : 0; + } +#endif long err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; return node;