initial numa support for arenas

This commit is contained in:
daan 2019-11-01 19:53:07 -07:00
parent aaf01620f4
commit a6499be074
6 changed files with 241 additions and 153 deletions

View file

@ -25,8 +25,10 @@ with on-demand coalescing.
// os.c
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
//int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize);
int _mi_os_numa_node_count(void);
/* -----------------------------------------------------------
Arena allocation
@ -44,6 +46,7 @@ typedef uintptr_t mi_block_info_t;
typedef struct mi_arena_s {
uint8_t* start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
int numa_node; // associated NUMA node
bool is_zero_init; // is the arena zero initialized?
bool is_large; // large OS page allocated
_Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks
@ -223,7 +226,31 @@ static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_ze
Arena Allocation
----------------------------------------------------------- */
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) {
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
bool* commit, bool* large, bool* is_zero,
size_t* memid)
{
size_t block_index = SIZE_MAX;
void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index);
if (p != NULL) {
mi_assert_internal(block_index != SIZE_MAX);
#if MI_DEBUG>=1
_Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
mi_block_info_t binfo = mi_atomic_read(block);
mi_assert_internal(mi_block_is_in_use(binfo));
mi_assert_internal(mi_block_count(binfo) >= needed_bcount);
#endif
*memid = mi_memid_create(arena_index, block_index);
*commit = true; // TODO: support commit on demand?
*large = arena->is_large;
}
return p;
}
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
bool* commit, bool* large, bool* is_zero,
size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = MI_MEMID_OS;
@ -240,33 +267,36 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
{
size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
int numa_node = _mi_os_numa_node(); // current numa node
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
// try numa affine allocation
for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
if (arena==NULL) break;
if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages
size_t block_index = SIZE_MAX;
void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index);
if (p != NULL) {
mi_assert_internal(block_index != SIZE_MAX);
#if MI_DEBUG>=1
_Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
mi_block_info_t binfo = mi_atomic_read(block);
mi_assert_internal(mi_block_is_in_use(binfo));
mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
#endif
*memid = mi_memid_create(i, block_index);
*commit = true; // TODO: support commit on demand?
*large = arena->is_large;
mi_assert_internal((uintptr_t)p % alignment == 0);
return p;
}
if (arena==NULL) break; // end reached
if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
{
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid);
mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
}
}
// try from another numa node instead..
for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
if (arena==NULL) break; // end reached
if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
{
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid);
mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
}
}
}
// fall back to the OS
// finally, fall back to the OS
*is_zero = true;
*memid = MI_MEMID_OS;
return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
@ -350,31 +380,61 @@ static bool mi_arena_add(mi_arena_t* arena) {
----------------------------------------------------------- */
#include <errno.h> // ENOMEM
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
size_t pages_reserved_default = 0;
if (pages_reserved==NULL) pages_reserved = &pages_reserved_default;
// reserve at a specific numa node
static int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
size_t hsize = 0;
void* p = NULL;
int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize);
_mi_verbose_message("reserved %zu huge pages\n", *pages_reserved);
if (p==NULL) return err;
// err might be != 0 but that is fine, we just got less pages.
mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages);
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize);
if (p==NULL) return ENOMEM;
_mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages);
size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main);
size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
if (arena == NULL) {
*pages_reserved = 0;
_mi_os_free(p, hsize, &_mi_stats_main);
return ENOMEM;
}
arena->block_count = bcount;
arena->start = (uint8_t*)p;
arena->block_bottom = 0;
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
arena->is_large = true;
arena->is_zero_init = true;
memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
//mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false));
mi_arena_add(arena);
return 0;
}
// reserve huge pages evenly among all numa nodes.
int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
if (pages == 0) return 0;
// pages per numa node
int numa_count = _mi_os_numa_node_count();
if (numa_count <= 0) numa_count = 1;
size_t pages_per = pages / numa_count;
if (pages_per == 0) pages_per = 1;
// reserve evenly among numa nodes
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node);
if (err) return err;
if (pages < pages_per) {
pages = 0;
}
else {
pages -= pages_per;
}
}
return 0;
}
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
_mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
if (pages_reserved != NULL) *pages_reserved = 0;
int err = mi_reserve_huge_os_pages_interleave(pages);
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
return err;
}