diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 10528877..afa265f5 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -68,7 +68,7 @@ bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) // arena.c void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats); // "segment.c" diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index cd9c5154..8203bc3b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -108,7 +108,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) // 64kb on 64-bit -#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/4) // 16mb on 64-bit +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32mb on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) // Minimal alignment necessary. On most platforms 16 bytes are needed diff --git a/src/arena.c b/src/arena.c index 93655033..200e1ed7 100644 --- a/src/arena.c +++ b/src/arena.c @@ -52,9 +52,9 @@ int _mi_os_numa_node_count(void); // size in count of arena blocks. typedef uintptr_t mi_block_info_t; #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE/2) // 32MiB -#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB +#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_ALIGN // 64MiB +#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 4GiB +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor @@ -118,6 +118,98 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* } +/* ----------------------------------------------------------- + Arena cache +----------------------------------------------------------- */ +#define MI_CACHE_MAX (8) +#define MI_MAX_NUMA (64) + +#define MI_SLOT_IN_USE ((void*)1) + +typedef struct mi_cache_slot_s { + volatile _Atomic(void*) p; + volatile size_t memid; + volatile bool is_committed; + volatile bool is_large; +} mi_cache_slot_t; + +static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; + +static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { + // only segment blocks + if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL; + + // set numa range + int numa_min = numa_node; + int numa_max = numa_min; + if (numa_node < 0) { + numa_min = 0; + numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA; + } + else { + if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; + numa_min = numa_max = numa_node; + } + + // find a free slot + mi_cache_slot_t* slot; + for (int n = numa_min; n <= numa_max; n++) { + for (int i = 0; i < MI_CACHE_MAX; i++) { + slot = &cache[n][i]; + void* p = mi_atomic_read_ptr_relaxed(&slot->p); + if (p > MI_SLOT_IN_USE) { // not NULL or 1 + if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) { + // claimed + if (!*large && slot->is_large) { + // back out again + mi_atomic_write_ptr(&slot->p, p); // make it available again + } + else { + // keep it + *memid = slot->memid; + *large = slot->is_large; + *is_zero = false; + bool committed = slot->is_committed; + mi_atomic_write_ptr(&slot->p, NULL); // set it free + if (*commit && !committed) { + bool commit_zero; + _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); + } + *commit = committed; + return p; + } + } + } + } + } + return NULL; +} + +static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large) { + // only for segment blocks + if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; + + // try to add it to the cache + int numa_node = _mi_os_numa_node(NULL); + if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; + mi_cache_slot_t* slot; + for (int i = 0; i < MI_CACHE_MAX; i++) { + slot = &cache[numa_node][i]; + void* p = mi_atomic_read_ptr_relaxed(&slot->p); + if (p == NULL) { // free slot + if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) { + // claimed! + slot->memid = memid; + slot->is_committed = is_committed; + slot->is_large = is_large; + mi_atomic_write_ptr(&slot->p, start); // and make it available; + return true; + } + } + } + return false; +} + /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ @@ -148,6 +240,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` + const int numa_node = _mi_os_numa_node(tld); // current numa node + // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. if (alignment <= MI_SEGMENT_ALIGN && @@ -155,8 +249,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size >= MI_ARENA_MIN_OBJ_SIZE) { const size_t bcount = mi_block_count_of_size(size); - const int numa_node = _mi_os_numa_node(tld); // current numa node - + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { @@ -184,6 +277,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, } } + // try to get from the cache + void* p = mi_cache_pop(numa_node, size, alignment, commit, large, is_zero, memid, tld); + if (p != NULL) return p; + + // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; @@ -202,13 +300,16 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, siz Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); if (p==NULL) return; if (size==0) return; + if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - _mi_os_free(p, size, stats); + if (!mi_cache_push(p, size, memid, is_committed, is_large)) { + _mi_os_free(p, size, stats); + } } else { // allocated in an arena diff --git a/src/segment.c b/src/segment.c index 99e382bc..54a0c8fe 100644 --- a/src/segment.c +++ b/src/segment.c @@ -284,7 +284,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set } // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); - _mi_arena_free(segment, mi_segment_size(segment), segment->memid, tld->stats); + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_committed || (~segment->commit_mask == 0), segment->mem_is_fixed, tld->stats); } diff --git a/test/test-stress.c b/test/test-stress.c index 50cbf9bd..3aa65f41 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------------- + /* ---------------------------------------------------------------------------- Copyright (c) 2018,2019 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. @@ -64,9 +64,9 @@ static bool chance(size_t perc, random_t r) { static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) { - if (chance(1,r)) items *= 1000; // 0.01% giant - else if (chance(10,r)) items *= 100; // 0.1% huge - else items *= 10; // 1% large objects; + if (chance(1, r)) items *= 1000; // 0.01% giant + else if (chance(10, r)) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; } if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));