From 72a39c0bb1220ca7046ff9084da312cb701d2616 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 11 Jul 2019 13:30:40 -0700 Subject: [PATCH 1/3] initial fix for delayed freeing of huge pages transferred between threads --- src/heap.c | 2 +- src/page-queue.c | 2 +- src/page.c | 14 +++++++++++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/heap.c b/src/heap.c index 93968713..b7f93b8a 100644 --- a/src/heap.c +++ b/src/heap.c @@ -123,7 +123,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) if (collect == ABANDON) { for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE - } + } } // free thread delayed blocks. diff --git a/src/page-queue.c b/src/page-queue.c index 8817d992..ebe858b3 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -293,7 +293,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); mi_assert_internal(page->block_size == to->block_size || - (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->block_size > MI_LARGE_SIZE_MAX && (mi_page_queue_is_huge(to) || mi_page_queue_is_full(to))) || (page->block_size == from->block_size && mi_page_queue_is_full(to))); if (page->prev != NULL) page->prev->next = page->next; diff --git a/src/page.c b/src/page.c index 04464fc5..1d5f77ae 100644 --- a/src/page.c +++ b/src/page.c @@ -542,6 +542,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert(mi_page_immediate_available(page)); } + /* ----------------------------------------------------------- Find pages with free blocks -------------------------------------------------------------*/ @@ -618,7 +619,6 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // Find a page with free blocks of `size`. static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { - _mi_heap_delayed_free(heap); mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_t* page = pq->first; if (page != NULL) { @@ -674,7 +674,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); mi_heap_stat_increase( heap, huge, block_size); - } + } return page; } @@ -694,6 +694,9 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // call potential deferred free routines _mi_deferred_free(heap, false); + // free delayed frees from other threads + _mi_heap_delayed_free(heap); + // huge allocation? mi_page_t* page; if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) { @@ -714,5 +717,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_assert_internal(page->block_size >= size); // and try again, this time succeeding! (i.e. this should never recurse) - return _mi_page_malloc(heap, page, size); + void* p = _mi_page_malloc(heap, page, size); + if (page->used == page->reserved) { + // needed for huge pages to free reliably from other threads. + mi_page_to_full(page,mi_page_queue_of(page)); + } + return p; } From 1fdb4b288fcbc754487374028c58b0035fab9fce Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 11 Jul 2019 15:21:57 -0700 Subject: [PATCH 2/3] more eager handling of non-local frees --- include/mimalloc-internal.h | 2 +- include/mimalloc-types.h | 7 ++++--- src/alloc.c | 7 +++++-- src/heap.c | 19 ++++++++++++++----- src/page.c | 29 ++++++++++++++++++++--------- 5 files changed, 44 insertions(+), 20 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 64a169f8..9fa5993b 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -66,7 +66,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... void _mi_heap_delayed_free(mi_heap_t* heap); -void _mi_page_use_delayed_free(mi_page_t* page, bool enable); +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); void _mi_deferred_free(mi_heap_t* heap, bool force); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 145ea9f0..067616fe 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -114,8 +114,9 @@ typedef struct mi_block_s { typedef enum mi_delayed_e { MI_NO_DELAYED_FREE = 0, - MI_USE_DELAYED_FREE, - MI_DELAYED_FREEING + MI_USE_DELAYED_FREE = 1, + MI_DELAYED_FREEING = 2, + MI_NEVER_DELAYED_FREE = 3 } mi_delayed_t; @@ -132,7 +133,7 @@ typedef union mi_page_flags_u { typedef union mi_thread_free_u { volatile uintptr_t value; struct { - mi_delayed_t delayed:2; + uintptr_t delayed:2; #if MI_INTPTR_SIZE==8 uintptr_t head:62; // head free block in the list (right-shifted by 2) #elif MI_INTPTR_SIZE==4 diff --git a/src/alloc.c b/src/alloc.c index 8ae29723..6a2a263f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -112,7 +112,9 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc do { tfreex.value = tfree.value = page->thread_free.value; - use_delayed = (tfree.delayed == MI_USE_DELAYED_FREE); + use_delayed = (tfree.delayed == MI_USE_DELAYED_FREE || + (tfree.delayed == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1) + ); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list tfreex.delayed = MI_DELAYED_FREEING; @@ -144,7 +146,8 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // and reset the MI_DELAYED_FREEING flag do { tfreex.value = tfree.value = page->thread_free.value; - tfreex.delayed = MI_NO_DELAYED_FREE; + mi_assert_internal(tfree.delayed == MI_NEVER_DELAYED_FREE || tfree.delayed == MI_DELAYED_FREEING); + if (tfree.delayed != MI_NEVER_DELAYED_FREE) tfreex.delayed = MI_NO_DELAYED_FREE; } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); } } diff --git a/src/heap.c b/src/heap.c index b7f93b8a..dc21bd0a 100644 --- a/src/heap.c +++ b/src/heap.c @@ -97,6 +97,14 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t return true; // don't break } +static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { + UNUSED(arg1); + UNUSED(arg2); + UNUSED(heap); + UNUSED(pq); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + return true; // don't break +} static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { @@ -119,11 +127,12 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) #endif } - // if abandoning, mark all full pages to no longer add to delayed_free + // if abandoning, mark all pages to no longer add to delayed_free if (collect == ABANDON) { - for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { - _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE - } + //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { + // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE + //} + mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } // free thread delayed blocks. @@ -228,7 +237,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(pq); // ensure no more thread_delayed_free will be added - _mi_page_use_delayed_free(page, false); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); // stats if (page->block_size > MI_LARGE_SIZE_MAX) { diff --git a/src/page.c b/src/page.c index 1d5f77ae..e888885b 100644 --- a/src/page.c +++ b/src/page.c @@ -109,17 +109,19 @@ bool _mi_page_is_valid(mi_page_t* page) { #endif -void _mi_page_use_delayed_free(mi_page_t* page, bool enable) { +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { mi_thread_free_t tfree; mi_thread_free_t tfreex; do { tfreex.value = tfree.value = page->thread_free.value; - tfreex.delayed = (enable ? MI_USE_DELAYED_FREE : MI_NO_DELAYED_FREE); - if (mi_unlikely(tfree.delayed == MI_DELAYED_FREEING)) { + if (mi_unlikely(tfree.delayed < MI_DELAYED_FREEING)) { + tfreex.delayed = delay; + } + else if (mi_unlikely(tfree.delayed == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. continue; // and try again - } + } } while(tfreex.delayed != tfree.delayed && // avoid atomic operation if already equal !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); @@ -272,7 +274,7 @@ void _mi_page_unfull(mi_page_t* page) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(page->flags.in_full); - _mi_page_use_delayed_free(page, false); + _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE); if (!page->flags.in_full) return; mi_heap_t* heap = page->heap; @@ -288,7 +290,7 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!page->flags.in_full); - _mi_page_use_delayed_free(page, true); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE); if (page->flags.in_full) return; mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); @@ -305,8 +307,8 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - mi_assert_internal(page->thread_free.delayed == MI_NO_DELAYED_FREE); + _mi_page_use_delayed_free(page,MI_NEVER_DELAYED_FREE); #if MI_DEBUG>1 // check there are no references left.. for (mi_block_t* block = (mi_block_t*)page->heap->thread_delayed_free; block != NULL; block = mi_block_nextx(page->heap->cookie,block)) { @@ -330,7 +332,14 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(page->thread_free.delayed != MI_DELAYED_FREEING); + #if MI_DEBUG>1 + // check if we can safely free + mi_thread_free_t free; + free.value = page->thread_free.value; + free.delayed = MI_NEVER_DELAYED_FREE; + free.value = mi_atomic_exchange(&page->thread_free.value, free.value); + mi_assert_internal(free.delayed != MI_DELAYED_FREEING); + #endif page->flags.has_aligned = false; @@ -717,10 +726,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_assert_internal(page->block_size >= size); // and try again, this time succeeding! (i.e. this should never recurse) - void* p = _mi_page_malloc(heap, page, size); + return _mi_page_malloc(heap, page, size); + /* if (page->used == page->reserved) { // needed for huge pages to free reliably from other threads. mi_page_to_full(page,mi_page_queue_of(page)); } return p; + */ } From a932e43650f551aa477c4dae0df691acd153cd39 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 11 Jul 2019 15:44:37 -0700 Subject: [PATCH 3/3] experiment with larger small pages --- include/mimalloc-types.h | 4 ++-- src/options.c | 2 +- src/segment.c | 16 ++++++++++++---- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 04148f3e..106eaaef 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -75,8 +75,8 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit -#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_LARGE_PAGE_SHIFT ( 6 + MI_SMALL_PAGE_SHIFT) // 4mb +#define MI_SMALL_PAGE_SHIFT (14 + MI_INTPTR_SHIFT) // 64kb +#define MI_LARGE_PAGE_SHIFT ( 5 + MI_SMALL_PAGE_SHIFT) // 4mb #define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb // Derived constants diff --git a/src/options.c b/src/options.c index 9362cb64..b27f714f 100644 --- a/src/options.c +++ b/src/options.c @@ -35,7 +35,7 @@ static mi_option_desc_t options[_mi_option_last] = { { 0, UNINIT, "page_reset" }, { 0, UNINIT, "cache_reset" }, { 0, UNINIT, "pool_commit" }, - { 1, UNINIT, "eager_commit" }, // secure must have eager commit + { 0, UNINIT, "eager_commit" }, // secure must have eager commit { 0, UNINIT, "large_os_pages" }, // use large OS pages { 0, UNINIT, "reset_decommits" }, { 0, UNINIT, "reset_discards" }, diff --git a/src/segment.c b/src/segment.c index eae33bba..edae881a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -736,13 +736,21 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= (MI_SMALL_PAGE_SIZE / 8)) + if (block_size <= (MI_SMALL_PAGE_SIZE / 8)) { // smaller blocks than 8kb (assuming MI_SMALL_PAGE_SIZE == 64kb) page = mi_segment_small_page_alloc(tld,os_tld); - else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) - page = mi_segment_large_page_alloc(tld, os_tld); - else + } + else if (block_size <= (MI_SMALL_PAGE_SIZE/2) && (MI_SMALL_PAGE_SIZE % block_size) <= (MI_SMALL_PAGE_SIZE / 8)) { + // use small page too if it happens to fit well + page = mi_segment_small_page_alloc(tld, os_tld); + } + else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) { + // otherwise use a large page + page = mi_segment_large_page_alloc(tld, os_tld); + } + else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); + } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); return page; }