mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-06 15:29:31 +03:00
add numa-affine allocation, and per-heap numa affinity
This commit is contained in:
parent
30dfe97f5b
commit
c1cbe71836
8 changed files with 75 additions and 63 deletions
|
@ -266,7 +266,7 @@ typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_
|
||||||
|
|
||||||
mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
||||||
|
|
||||||
// Experimental
|
// Advanced
|
||||||
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
|
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
|
||||||
mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
|
mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
|
||||||
|
|
||||||
|
@ -279,7 +279,7 @@ mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_commi
|
||||||
mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
|
mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
|
||||||
mi_decl_export void mi_arenas_print(void) mi_attr_noexcept;
|
mi_decl_export void mi_arenas_print(void) mi_attr_noexcept;
|
||||||
|
|
||||||
// Experimental: heaps associated with specific memory arena's
|
// Advanced: heaps associated with specific memory arena's
|
||||||
typedef void* mi_arena_id_t;
|
typedef void* mi_arena_id_t;
|
||||||
mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
|
mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
|
||||||
mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
|
mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
|
||||||
|
@ -292,7 +292,7 @@ mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t a
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// Experimental: allow sub-processes whose memory areas stay separated (and no reclamation between them)
|
// Advanced: allow sub-processes whose memory areas stay separated (and no reclamation between them)
|
||||||
// Used for example for separate interpreters in one process.
|
// Used for example for separate interpreters in one process.
|
||||||
typedef void* mi_subproc_id_t;
|
typedef void* mi_subproc_id_t;
|
||||||
mi_decl_export mi_subproc_id_t mi_subproc_main(void);
|
mi_decl_export mi_subproc_id_t mi_subproc_main(void);
|
||||||
|
@ -300,10 +300,15 @@ mi_decl_export mi_subproc_id_t mi_subproc_new(void);
|
||||||
mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc);
|
mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc);
|
||||||
mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
|
mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
|
||||||
|
|
||||||
// Experimental: visit abandoned heap areas (that are not owned by a specific heap)
|
// Advanced: visit abandoned heap areas (that are not owned by a specific heap)
|
||||||
mi_decl_export bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
mi_decl_export bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
||||||
|
|
||||||
|
// Experimental: set numa-affinity of a heap
|
||||||
|
mi_decl_export void mi_heap_set_numa_affinity(mi_heap_t* heap, int numa_node);
|
||||||
|
|
||||||
// Experimental: objects followed by a guard page.
|
// Experimental: objects followed by a guard page.
|
||||||
|
// Setting the sample rate on a specific heap can be used to test parts of the program more
|
||||||
|
// specifically (in combination with `mi_heap_set_default`).
|
||||||
// A sample rate of 0 disables guarded objects, while 1 uses a guard page for every object.
|
// A sample rate of 0 disables guarded objects, while 1 uses a guard page for every object.
|
||||||
// A seed of 0 uses a random start point. Only objects within the size bound are eligable for guard pages.
|
// A seed of 0 uses a random start point. Only objects within the size bound are eligable for guard pages.
|
||||||
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed);
|
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed);
|
||||||
|
@ -324,13 +329,6 @@ mi_decl_export void mi_collect_reduce(size_t target_thread_owned) mi_attr_noexce
|
||||||
|
|
||||||
|
|
||||||
// experimental
|
// experimental
|
||||||
//mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size);
|
|
||||||
//mi_decl_export void* mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, void** base, size_t* full_size);
|
|
||||||
//mi_decl_export void* mi_os_alloc_aligned_allow_large(size_t size, size_t alignment, bool commit, bool* is_committed, bool* is_pinned, void** base, size_t* full_size);
|
|
||||||
//mi_decl_export void mi_os_free(void* p, size_t size);
|
|
||||||
//mi_decl_export void mi_os_commit(void* p, size_t size);
|
|
||||||
//mi_decl_export void mi_os_decommit(void* p, size_t size);
|
|
||||||
|
|
||||||
mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
|
mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
|
||||||
mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
|
mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
|
||||||
mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena);
|
mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena);
|
||||||
|
|
|
@ -159,6 +159,8 @@ bool _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned);
|
||||||
bool _mi_os_secure_guard_page_reset_at(void* addr);
|
bool _mi_os_secure_guard_page_reset_at(void* addr);
|
||||||
bool _mi_os_secure_guard_page_reset_before(void* addr);
|
bool _mi_os_secure_guard_page_reset_before(void* addr);
|
||||||
|
|
||||||
|
int _mi_os_numa_node(void);
|
||||||
|
size_t _mi_os_numa_node_count(void);
|
||||||
|
|
||||||
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
|
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
|
||||||
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
|
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
|
||||||
|
@ -174,8 +176,8 @@ mi_arena_id_t _mi_arena_id_none(void);
|
||||||
mi_arena_t* _mi_arena_from_id(mi_arena_id_t id);
|
mi_arena_t* _mi_arena_from_id(mi_arena_id_t id);
|
||||||
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena);
|
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena);
|
||||||
|
|
||||||
void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid);
|
void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid);
|
||||||
void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid);
|
void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid);
|
||||||
void _mi_arenas_free(void* p, size_t size, mi_memid_t memid);
|
void _mi_arenas_free(void* p, size_t size, mi_memid_t memid);
|
||||||
bool _mi_arenas_contain(const void* p);
|
bool _mi_arenas_contain(const void* p);
|
||||||
void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld);
|
void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld);
|
||||||
|
@ -1026,24 +1028,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------
|
|
||||||
// Optimize numa node access for the common case (= one node)
|
|
||||||
// -------------------------------------------------------------------
|
|
||||||
|
|
||||||
int _mi_os_numa_node_get(void);
|
|
||||||
size_t _mi_os_numa_node_count_get(void);
|
|
||||||
|
|
||||||
extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count;
|
|
||||||
static inline int _mi_os_numa_node(void) {
|
|
||||||
if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
|
|
||||||
else return _mi_os_numa_node_get();
|
|
||||||
}
|
|
||||||
static inline size_t _mi_os_numa_node_count(void) {
|
|
||||||
const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
|
|
||||||
if mi_likely(count > 0) { return count; }
|
|
||||||
else return _mi_os_numa_node_count_get();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------
|
||||||
// Provide our own `_mi_memcpy` for potential performance optimizations.
|
// Provide our own `_mi_memcpy` for potential performance optimizations.
|
||||||
|
|
|
@ -424,6 +424,7 @@ typedef struct mi_padding_s {
|
||||||
struct mi_heap_s {
|
struct mi_heap_s {
|
||||||
mi_tld_t* tld; // thread-local data
|
mi_tld_t* tld; // thread-local data
|
||||||
mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL)
|
mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL)
|
||||||
|
int numa_node; // preferred numa node (or -1 for no preference)
|
||||||
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
|
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
|
||||||
mi_random_ctx_t random; // random number context used for secure allocation
|
mi_random_ctx_t random; // random number context used for secure allocation
|
||||||
size_t page_count; // total number of pages in the `pages` queues.
|
size_t page_count; // total number of pages in the `pages` queues.
|
||||||
|
@ -485,6 +486,7 @@ typedef int64_t mi_msecs_t;
|
||||||
struct mi_tld_s {
|
struct mi_tld_s {
|
||||||
mi_threadid_t thread_id; // thread id of this thread
|
mi_threadid_t thread_id; // thread id of this thread
|
||||||
size_t thread_seq; // thread sequence id (linear count of created threads)
|
size_t thread_seq; // thread sequence id (linear count of created threads)
|
||||||
|
int numa_node; // thread preferred numa node
|
||||||
mi_subproc_t* subproc; // sub-process this thread belongs to.
|
mi_subproc_t* subproc; // sub-process this thread belongs to.
|
||||||
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
|
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
|
||||||
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
|
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
|
||||||
|
|
|
@ -64,11 +64,11 @@ static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) {
|
||||||
// allocate a fresh meta page and add it to the global list.
|
// allocate a fresh meta page and add it to the global list.
|
||||||
static mi_meta_page_t* mi_meta_page_zalloc(void) {
|
static mi_meta_page_t* mi_meta_page_zalloc(void) {
|
||||||
// allocate a fresh arena slice
|
// allocate a fresh arena slice
|
||||||
// note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again..
|
// note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again.. (same with _mi_os_numa_node()...)
|
||||||
mi_memid_t memid;
|
mi_memid_t memid;
|
||||||
uint8_t* base = (uint8_t*)_mi_arenas_alloc_aligned(_mi_subproc(), MI_META_PAGE_SIZE, MI_META_PAGE_ALIGN, 0,
|
uint8_t* base = (uint8_t*)_mi_arenas_alloc_aligned(_mi_subproc(), MI_META_PAGE_SIZE, MI_META_PAGE_ALIGN, 0,
|
||||||
true /* commit*/, (MI_SECURE==0) /* allow large? */,
|
true /* commit*/, (MI_SECURE==0) /* allow large? */,
|
||||||
NULL /* req arena */, 0 /* thread_seq */, &memid);
|
NULL /* req arena */, 0 /* thread_seq */, -1 /* numa node */, &memid);
|
||||||
if (base == NULL) return NULL;
|
if (base == NULL) return NULL;
|
||||||
mi_assert_internal(_mi_is_aligned(base,MI_META_PAGE_ALIGN));
|
mi_assert_internal(_mi_is_aligned(base,MI_META_PAGE_ALIGN));
|
||||||
if (!memid.initially_zero) {
|
if (!memid.initially_zero) {
|
||||||
|
|
51
src/arena.c
51
src/arena.c
|
@ -335,12 +335,13 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_
|
||||||
Arena iteration
|
Arena iteration
|
||||||
----------------------------------------------------------- */
|
----------------------------------------------------------- */
|
||||||
|
|
||||||
static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_pinned) {
|
static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, bool match_numa, int numa_node, bool allow_pinned) {
|
||||||
if (!allow_pinned && arena->memid.is_pinned) return false;
|
if (!allow_pinned && arena->memid.is_pinned) return false;
|
||||||
if (!mi_arena_id_is_suitable(arena, req_arena)) return false;
|
if (!mi_arena_id_is_suitable(arena, req_arena)) return false;
|
||||||
if (req_arena == NULL) { // if not specific, check numa affinity
|
if (req_arena == NULL) { // if not specific, check numa affinity
|
||||||
const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
|
const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
|
||||||
if (!numa_suitable) return false;
|
if (match_numa) { if (!numa_suitable) return false; }
|
||||||
|
else { if (numa_suitable) return false; }
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -375,9 +376,9 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, name_arena) \
|
#define mi_forall_suitable_arenas(subproc, req_arena, tseq, match_numa, numa_node, allow_large, name_arena) \
|
||||||
mi_forall_arenas(subproc, req_arena,tseq,name_arena) { \
|
mi_forall_arenas(subproc, req_arena,tseq,name_arena) { \
|
||||||
if (mi_arena_is_suitable(name_arena, req_arena, -1 /* todo: numa node */, allow_large)) { \
|
if (mi_arena_is_suitable(name_arena, req_arena, match_numa, numa_node, allow_large)) { \
|
||||||
|
|
||||||
#define mi_forall_suitable_arenas_end() \
|
#define mi_forall_suitable_arenas_end() \
|
||||||
}} \
|
}} \
|
||||||
|
@ -390,14 +391,23 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena
|
||||||
// allocate slices from the arenas
|
// allocate slices from the arenas
|
||||||
static mi_decl_noinline void* mi_arenas_try_find_free(
|
static mi_decl_noinline void* mi_arenas_try_find_free(
|
||||||
mi_subproc_t* subproc, size_t slice_count, size_t alignment,
|
mi_subproc_t* subproc, size_t slice_count, size_t alignment,
|
||||||
bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid)
|
bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
|
||||||
{
|
{
|
||||||
mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_OBJ_SIZE));
|
mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_OBJ_SIZE));
|
||||||
mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
|
mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
|
||||||
if (alignment > MI_ARENA_SLICE_ALIGN) return NULL;
|
if (alignment > MI_ARENA_SLICE_ALIGN) return NULL;
|
||||||
|
|
||||||
// search arena's
|
// search arena's
|
||||||
mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena)
|
mi_forall_suitable_arenas(subproc, req_arena, tseq, true /* only numa matching */, numa_node, allow_large, arena)
|
||||||
|
{
|
||||||
|
void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
|
||||||
|
if (p != NULL) return p;
|
||||||
|
}
|
||||||
|
mi_forall_suitable_arenas_end();
|
||||||
|
if (numa_node < 0) return NULL;
|
||||||
|
|
||||||
|
// search again but now regardless of preferred numa affinity
|
||||||
|
mi_forall_suitable_arenas(subproc, req_arena, tseq, false /* numa non-matching now */, numa_node, allow_large, arena)
|
||||||
{
|
{
|
||||||
void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
|
void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
|
||||||
if (p != NULL) return p;
|
if (p != NULL) return p;
|
||||||
|
@ -411,14 +421,14 @@ static mi_decl_noinline void* mi_arenas_try_alloc(
|
||||||
mi_subproc_t* subproc,
|
mi_subproc_t* subproc,
|
||||||
size_t slice_count, size_t alignment,
|
size_t slice_count, size_t alignment,
|
||||||
bool commit, bool allow_large,
|
bool commit, bool allow_large,
|
||||||
mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid)
|
mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
|
||||||
{
|
{
|
||||||
mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES);
|
mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES);
|
||||||
mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
|
mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
|
||||||
void* p;
|
void* p;
|
||||||
|
|
||||||
// try to find free slices in the arena's
|
// try to find free slices in the arena's
|
||||||
p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid);
|
p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
|
||||||
if (p != NULL) return p;
|
if (p != NULL) return p;
|
||||||
|
|
||||||
// did we need a specific arena?
|
// did we need a specific arena?
|
||||||
|
@ -441,7 +451,7 @@ static mi_decl_noinline void* mi_arenas_try_alloc(
|
||||||
}
|
}
|
||||||
// try once more to allocate in the new arena
|
// try once more to allocate in the new arena
|
||||||
mi_assert_internal(req_arena == NULL);
|
mi_assert_internal(req_arena == NULL);
|
||||||
p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid);
|
p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
|
||||||
if (p != NULL) return p;
|
if (p != NULL) return p;
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -472,21 +482,18 @@ static void* mi_arena_os_alloc_aligned(
|
||||||
void* _mi_arenas_alloc_aligned( mi_subproc_t* subproc,
|
void* _mi_arenas_alloc_aligned( mi_subproc_t* subproc,
|
||||||
size_t size, size_t alignment, size_t align_offset,
|
size_t size, size_t alignment, size_t align_offset,
|
||||||
bool commit, bool allow_large,
|
bool commit, bool allow_large,
|
||||||
mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid)
|
mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
|
||||||
{
|
{
|
||||||
mi_assert_internal(memid != NULL);
|
mi_assert_internal(memid != NULL);
|
||||||
mi_assert_internal(size > 0);
|
mi_assert_internal(size > 0);
|
||||||
|
|
||||||
// *memid = _mi_memid_none();
|
|
||||||
// const int numa_node = _mi_os_numa_node(&tld->os); // current numa node
|
|
||||||
|
|
||||||
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
|
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
|
||||||
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed?
|
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed?
|
||||||
size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large
|
size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large
|
||||||
alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment
|
alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment
|
||||||
{
|
{
|
||||||
const size_t slice_count = mi_slice_count_of_size(size);
|
const size_t slice_count = mi_slice_count_of_size(size);
|
||||||
void* p = mi_arenas_try_alloc(subproc,slice_count, alignment, commit, allow_large, req_arena, tseq, memid);
|
void* p = mi_arenas_try_alloc(subproc,slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
|
||||||
if (p != NULL) return p;
|
if (p != NULL) return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -495,9 +502,9 @@ void* _mi_arenas_alloc_aligned( mi_subproc_t* subproc,
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid)
|
void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
|
||||||
{
|
{
|
||||||
return _mi_arenas_alloc_aligned(subproc, size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena, tseq, memid);
|
return _mi_arenas_alloc_aligned(subproc, size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena, tseq, numa_node, memid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -547,7 +554,9 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_
|
||||||
|
|
||||||
// search arena's
|
// search arena's
|
||||||
const bool allow_large = true;
|
const bool allow_large = true;
|
||||||
mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena)
|
const int any_numa = -1;
|
||||||
|
const bool match_numa = true;
|
||||||
|
mi_forall_suitable_arenas(subproc, req_arena, tseq, match_numa, any_numa, allow_large, arena)
|
||||||
{
|
{
|
||||||
size_t slice_index;
|
size_t slice_index;
|
||||||
mi_bitmap_t* const bitmap = arena->pages_abandoned[bin];
|
mi_bitmap_t* const bitmap = arena->pages_abandoned[bin];
|
||||||
|
@ -582,7 +591,7 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_
|
||||||
|
|
||||||
// Allocate a fresh page
|
// Allocate a fresh page
|
||||||
static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment,
|
static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment,
|
||||||
mi_arena_t* req_arena, size_t tseq, bool commit)
|
mi_arena_t* req_arena, size_t tseq, int numa_node, bool commit)
|
||||||
{
|
{
|
||||||
const bool allow_large = (MI_SECURE < 2); // 2 = guard page at end of each arena page
|
const bool allow_large = (MI_SECURE < 2); // 2 = guard page at end of each arena page
|
||||||
const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN);
|
const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN);
|
||||||
|
@ -596,7 +605,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice
|
||||||
!os_align && // not large alignment
|
!os_align && // not large alignment
|
||||||
slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large
|
slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large
|
||||||
{
|
{
|
||||||
page = (mi_page_t*)mi_arenas_try_alloc(subproc, slice_count, page_alignment, commit, allow_large, req_arena, tseq, &memid);
|
page = (mi_page_t*)mi_arenas_try_alloc(subproc, slice_count, page_alignment, commit, allow_large, req_arena, tseq, numa_node, &memid);
|
||||||
if (page != NULL) {
|
if (page != NULL) {
|
||||||
mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count));
|
mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count));
|
||||||
mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index);
|
mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index);
|
||||||
|
@ -727,7 +736,7 @@ static mi_page_t* mi_arenas_page_regular_alloc(mi_heap_t* heap, size_t slice_cou
|
||||||
const long commit_on_demand = mi_option_get(mi_option_page_commit_on_demand);
|
const long commit_on_demand = mi_option_get(mi_option_page_commit_on_demand);
|
||||||
const bool commit = (slice_count <= mi_slice_count_of_size(MI_PAGE_MIN_COMMIT_SIZE) || // always commit small pages
|
const bool commit = (slice_count <= mi_slice_count_of_size(MI_PAGE_MIN_COMMIT_SIZE) || // always commit small pages
|
||||||
(commit_on_demand == 2 && _mi_os_has_overcommit()) || (commit_on_demand == 0));
|
(commit_on_demand == 2 && _mi_os_has_overcommit()) || (commit_on_demand == 0));
|
||||||
page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq, commit);
|
page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq, heap->numa_node, commit);
|
||||||
if (page != NULL) {
|
if (page != NULL) {
|
||||||
mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count);
|
mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count);
|
||||||
_mi_page_init(heap, page);
|
_mi_page_init(heap, page);
|
||||||
|
@ -749,7 +758,7 @@ static mi_page_t* mi_arenas_page_singleton_alloc(mi_heap_t* heap, size_t block_s
|
||||||
const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, _mi_os_secure_guard_page_size()) + _mi_os_secure_guard_page_size());
|
const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, _mi_os_secure_guard_page_size()) + _mi_os_secure_guard_page_size());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
mi_page_t* page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq, true /* commit singletons always */);
|
mi_page_t* page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq, heap->numa_node, true /* commit singletons always */);
|
||||||
if (page == NULL) return NULL;
|
if (page == NULL) return NULL;
|
||||||
|
|
||||||
mi_assert(page->reserved == 1);
|
mi_assert(page->reserved == 1);
|
||||||
|
|
10
src/heap.c
10
src/heap.c
|
@ -183,11 +183,12 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool allow_destroy,
|
||||||
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
|
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
|
||||||
heap->memid = memid;
|
heap->memid = memid;
|
||||||
heap->tld = tld; // avoid reading the thread-local tld during initialization
|
heap->tld = tld; // avoid reading the thread-local tld during initialization
|
||||||
|
heap->tag = heap_tag;
|
||||||
|
heap->numa_node = tld->numa_node;
|
||||||
heap->exclusive_arena = _mi_arena_from_id(arena_id);
|
heap->exclusive_arena = _mi_arena_from_id(arena_id);
|
||||||
heap->allow_page_reclaim = (!allow_destroy && mi_option_get(mi_option_page_reclaim_on_free) >= 0);
|
heap->allow_page_reclaim = (!allow_destroy && mi_option_get(mi_option_page_reclaim_on_free) >= 0);
|
||||||
heap->allow_page_abandon = (!allow_destroy && mi_option_get(mi_option_page_full_retain) >= 0);
|
heap->allow_page_abandon = (!allow_destroy && mi_option_get(mi_option_page_full_retain) >= 0);
|
||||||
heap->page_full_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
|
heap->page_full_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
|
||||||
heap->tag = heap_tag;
|
|
||||||
if (heap->tld->is_in_threadpool) {
|
if (heap->tld->is_in_threadpool) {
|
||||||
// if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap.
|
// if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap.
|
||||||
// this is checked in `free.c:mi_free_try_collect_mt`
|
// this is checked in `free.c:mi_free_try_collect_mt`
|
||||||
|
@ -227,7 +228,7 @@ mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena
|
||||||
else {
|
else {
|
||||||
// heaps associated wita a specific arena are allocated in that arena
|
// heaps associated wita a specific arena are allocated in that arena
|
||||||
// note: takes up at least one slice which is quite wasteful...
|
// note: takes up at least one slice which is quite wasteful...
|
||||||
heap = (mi_heap_t*)_mi_arenas_alloc(_mi_subproc(), _mi_align_up(sizeof(mi_heap_t),MI_ARENA_MIN_OBJ_SIZE), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, &memid);
|
heap = (mi_heap_t*)_mi_arenas_alloc(_mi_subproc(), _mi_align_up(sizeof(mi_heap_t),MI_ARENA_MIN_OBJ_SIZE), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, tld->numa_node, &memid);
|
||||||
}
|
}
|
||||||
if (heap==NULL) {
|
if (heap==NULL) {
|
||||||
_mi_error_message(ENOMEM, "unable to allocate heap meta-data\n");
|
_mi_error_message(ENOMEM, "unable to allocate heap meta-data\n");
|
||||||
|
@ -261,6 +262,11 @@ uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
|
||||||
return _mi_random_next(&heap->random);
|
return _mi_random_next(&heap->random);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mi_heap_set_numa_affinity(mi_heap_t* heap, int numa_node) {
|
||||||
|
if (heap == NULL) return;
|
||||||
|
heap->numa_node = (numa_node < 0 ? -1 : numa_node % _mi_os_numa_node_count());
|
||||||
|
}
|
||||||
|
|
||||||
// zero out the page queues
|
// zero out the page queues
|
||||||
static void mi_heap_reset_pages(mi_heap_t* heap) {
|
static void mi_heap_reset_pages(mi_heap_t* heap) {
|
||||||
mi_assert_internal(heap != NULL);
|
mi_assert_internal(heap != NULL);
|
||||||
|
|
|
@ -104,6 +104,7 @@ static mi_decl_cache_align mi_subproc_t subproc_main
|
||||||
static mi_decl_cache_align mi_tld_t tld_empty = {
|
static mi_decl_cache_align mi_tld_t tld_empty = {
|
||||||
0, // thread_id
|
0, // thread_id
|
||||||
0, // thread_seq
|
0, // thread_seq
|
||||||
|
0, // default numa node
|
||||||
&subproc_main, // subproc
|
&subproc_main, // subproc
|
||||||
NULL, // heap_backing
|
NULL, // heap_backing
|
||||||
NULL, // heaps list
|
NULL, // heaps list
|
||||||
|
@ -117,6 +118,7 @@ static mi_decl_cache_align mi_tld_t tld_empty = {
|
||||||
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
|
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
|
||||||
&tld_empty, // tld
|
&tld_empty, // tld
|
||||||
NULL, // exclusive_arena
|
NULL, // exclusive_arena
|
||||||
|
0, // preferred numa node
|
||||||
0, // cookie
|
0, // cookie
|
||||||
//{ 0, 0 }, // keys
|
//{ 0, 0 }, // keys
|
||||||
{ {0}, {0}, 0, true }, // random
|
{ {0}, {0}, 0, true }, // random
|
||||||
|
@ -141,6 +143,7 @@ extern mi_decl_hidden mi_decl_cache_align mi_heap_t heap_main;
|
||||||
static mi_decl_cache_align mi_tld_t tld_main = {
|
static mi_decl_cache_align mi_tld_t tld_main = {
|
||||||
0, // thread_id
|
0, // thread_id
|
||||||
0, // thread_seq
|
0, // thread_seq
|
||||||
|
0, // numa node
|
||||||
&subproc_main, // subproc
|
&subproc_main, // subproc
|
||||||
&heap_main, // heap_backing
|
&heap_main, // heap_backing
|
||||||
&heap_main, // heaps list
|
&heap_main, // heaps list
|
||||||
|
@ -154,6 +157,7 @@ static mi_decl_cache_align mi_tld_t tld_main = {
|
||||||
mi_decl_cache_align mi_heap_t heap_main = {
|
mi_decl_cache_align mi_heap_t heap_main = {
|
||||||
&tld_main, // thread local data
|
&tld_main, // thread local data
|
||||||
NULL, // exclusive arena
|
NULL, // exclusive arena
|
||||||
|
0, // preferred numa node
|
||||||
0, // initial cookie
|
0, // initial cookie
|
||||||
//{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
|
//{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
|
||||||
{ {0x846ca68b}, {0}, 0, true }, // random
|
{ {0x846ca68b}, {0}, 0, true }, // random
|
||||||
|
@ -306,6 +310,7 @@ static mi_tld_t* mi_tld_alloc(void) {
|
||||||
tld->heap_backing = NULL;
|
tld->heap_backing = NULL;
|
||||||
tld->heaps = NULL;
|
tld->heaps = NULL;
|
||||||
tld->subproc = &subproc_main;
|
tld->subproc = &subproc_main;
|
||||||
|
tld->numa_node = _mi_os_numa_node();
|
||||||
tld->thread_id = _mi_prim_thread_id();
|
tld->thread_id = _mi_prim_thread_id();
|
||||||
tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1);
|
tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1);
|
||||||
tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
|
tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
|
||||||
|
|
18
src/os.c
18
src/os.c
|
@ -694,18 +694,18 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
|
||||||
Support NUMA aware allocation
|
Support NUMA aware allocation
|
||||||
-----------------------------------------------------------------------------*/
|
-----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
|
static _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
|
||||||
|
|
||||||
size_t _mi_os_numa_node_count_get(void) {
|
size_t _mi_os_numa_node_count(void) {
|
||||||
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
|
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
|
||||||
if (count <= 0) {
|
if mi_unlikely(count <= 0) {
|
||||||
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
|
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
|
||||||
if (ncount > 0) {
|
if (ncount > 0) {
|
||||||
count = (size_t)ncount;
|
count = (size_t)ncount;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
count = _mi_prim_numa_node_count(); // or detect dynamically
|
count = _mi_prim_numa_node_count(); // or detect dynamically
|
||||||
if (count == 0) count = 1;
|
if (count == 0) { count = 1; }
|
||||||
}
|
}
|
||||||
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
|
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
|
||||||
_mi_verbose_message("using %zd numa regions\n", count);
|
_mi_verbose_message("using %zd numa regions\n", count);
|
||||||
|
@ -713,7 +713,8 @@ size_t _mi_os_numa_node_count_get(void) {
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
int _mi_os_numa_node_get(void) {
|
|
||||||
|
static int mi_os_numa_node_get(void) {
|
||||||
size_t numa_count = _mi_os_numa_node_count();
|
size_t numa_count = _mi_os_numa_node_count();
|
||||||
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
|
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
|
||||||
// never more than the node count and >= 0
|
// never more than the node count and >= 0
|
||||||
|
@ -722,6 +723,13 @@ int _mi_os_numa_node_get(void) {
|
||||||
return (int)numa_node;
|
return (int)numa_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int _mi_os_numa_node(void) {
|
||||||
|
if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
|
||||||
|
else return mi_os_numa_node_get();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------------
|
||||||
Public API
|
Public API
|
||||||
|
|
Loading…
Add table
Reference in a new issue