From 8bfd5ec865a2f6f1a7d237092daa43c93aec5e2c Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 May 2020 23:00:17 -0700 Subject: [PATCH] improve arena cache to avoid full scans --- src/arena.c | 54 ++++++++++++++++++++++++++++++++++++++++++++------- src/options.c | 2 +- src/segment.c | 4 ++-- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/arena.c b/src/arena.c index 77616580..3f90a07d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -127,8 +127,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* /* ----------------------------------------------------------- Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_MAX (128) -#define MI_MAX_NUMA (16) +#define MI_CACHE_MAX (256) +#define MI_MAX_NUMA (8) #define MI_SLOT_IN_USE ((void*)1) @@ -140,7 +140,20 @@ typedef struct mi_cache_slot_s { volatile bool is_large; } mi_cache_slot_t; -static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; +static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; // = 0 +static volatile _Atomic(uintptr_t) cache_count[MI_MAX_NUMA]; // = 0 + +typedef union mi_cache_count_u { + uintptr_t value; + struct { + int16_t count; // at most `count` elements in the cache +#if MI_INTPTR_SIZE > 4 + uint32_t epoch; // each push/pop increase this +#else + uint16_t epoch; +#endif + } x; +} mi_cache_count_t; static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { // only segment blocks @@ -161,10 +174,23 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co // find a free slot mi_cache_slot_t* slot; for (int n = numa_min; n <= numa_max; n++) { - for (int i = 0; i < MI_CACHE_MAX; i++) { + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[n]); + int16_t count = top.x.count; + for (int16_t i = count - 1; i >= 0; i--) { slot = &cache[n][i]; void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p); - if (p > MI_SLOT_IN_USE) { // not NULL or 1 + if (p == NULL) { + if (count > 0) { count = i; } + } + else if (p > MI_SLOT_IN_USE) { // not NULL or 1 + if (count >= 0 && count < top.x.count) { // new lower bound? + mi_cache_count_t newtop = { 0 }; + newtop.x.count = count; + newtop.x.epoch = top.x.epoch + 1; + mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value); // it's fine to not succeed; just causes longer scans + } + count = -1; // don't try to set lower bound again if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { // claimed if (!*large && slot->is_large) { @@ -204,7 +230,9 @@ static void mi_cache_purge(mi_os_tld_t* tld) { if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; mi_cache_slot_t* slot; int purged = 0; - for (int i = 0; i < MI_CACHE_MAX; i++) { + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + for (int i = 0; i < top.x.count; i++) { slot = &cache[numa_node][i]; void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { @@ -240,12 +268,24 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit int numa_node = _mi_os_numa_node(NULL); if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; mi_cache_slot_t* slot; - for (int i = 0; i < MI_CACHE_MAX; i++) { + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) { slot = &cache[numa_node][i]; void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); if (p == NULL) { // free slot if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) { // claimed! + // first try to increase the top bound + mi_cache_count_t newtop = { 0 }; + newtop.x.count = i+1; + newtop.x.epoch = top.x.epoch + 1; + while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) { + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + if (top.x.count > newtop.x.count) break; // another push max'd it + newtop.x.epoch = top.x.epoch + 1; // otherwise try again + } + // set the slot slot->expire = 0; slot->is_committed = is_committed; slot->memid = memid; diff --git a/src/options.c b/src/options.c index 89048c7d..767a7c35 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 0, UNINIT, MI_OPTION(eager_commit) }, // commit on demand? + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand? #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory diff --git a/src/segment.c b/src/segment.c index cd239931..5cf1598d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -960,6 +960,7 @@ static void mi_abandoned_push(mi_segment_t* segment) { } // Wait until there are no more pending reads on segments that used to be in the abandoned list +// called for example from `arena.c` before decommitting void _mi_abandoned_await_readers(void) { uintptr_t n; do { @@ -982,8 +983,7 @@ static mi_segment_t* mi_abandoned_pop(void) { // Do a pop. We use a reader count to prevent // a segment to be decommitted while a read is still pending, - // and a tagged pointer to prevent A-B-A link corruption. - // (this is called from `memory.c:_mi_mem_free` for example) + // and a tagged pointer to prevent A-B-A link corruption. mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; do {