diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e996dfd0..a7f2f66d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -90,7 +90,7 @@ void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_at void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero); void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero); mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); -void _mi_free_delayed_block(mi_block_t* block); +void _mi_free_delayed_block(mi_page_t* page, mi_block_t* block); #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); @@ -229,6 +229,23 @@ static inline mi_page_t* _mi_ptr_page(void* p) { return _mi_segment_page_of(_mi_ptr_segment(p), p); } +// Thread free access +static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { + return (mi_block_t*)(tf & ~0x03); +} +static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { + return (mi_delayed_t)(tf & 0x03); +} +static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { + return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); +} +static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { + return mi_tf_make(mi_tf_block(tf),delayed); +} +static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { + return mi_tf_make(block, mi_tf_delayed(tf)); +} + // are all blocks in a page freed? static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); @@ -243,7 +260,7 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { // are there free blocks in this page? static inline bool mi_page_has_free(mi_page_t* page) { mi_assert_internal(page != NULL); - bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (page->thread_free.head != 0)); + bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL)); mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity); return hasfree; } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 966de36a..2f16020f 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -134,20 +134,9 @@ typedef union mi_page_flags_u { } mi_page_flags_t; // Thread free list. -// We use 2 bits of the pointer for the `use_delayed_free` and `delayed_freeing` flags. -typedef union mi_thread_free_u { - volatile uintptr_t value; - struct { - uintptr_t delayed:2; -#if MI_INTPTR_SIZE==8 - uintptr_t head:62; // head free block in the list (right-shifted by 2) -#elif MI_INTPTR_SIZE==4 - uintptr_t head:30; -#endif - }; -} mi_thread_free_t; +// We use bottom 2 bits of the pointer for mi_delayed_t flags +typedef uintptr_t mi_thread_free_t; -#define MI_TF_PTR_SHIFT (2) // A page contains blocks of one specific size (`block_size`). // Each page has three list of free blocks: diff --git a/src/alloc.c b/src/alloc.c index 6a2a263f..6a5395c7 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -106,25 +106,25 @@ void* mi_zalloc(size_t size) mi_attr_noexcept { // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { - mi_thread_free_t tfree = {0}; - mi_thread_free_t tfreex = {0}; + mi_thread_free_t tfree; + mi_thread_free_t tfreex; bool use_delayed; do { - tfreex.value = tfree.value = page->thread_free.value; - use_delayed = (tfree.delayed == MI_USE_DELAYED_FREE || - (tfree.delayed == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1) + tfree = page->thread_free; + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || + (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1) ); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list - tfreex.delayed = MI_DELAYED_FREEING; + tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); } else { // usual: directly add to page thread_free list - mi_block_set_next(page, block, (mi_block_t*)((uintptr_t)tfree.head << MI_TF_PTR_SHIFT)); - tfreex.head = (uintptr_t)block >> MI_TF_PTR_SHIFT; + mi_block_set_next(page, block, mi_tf_block(tfree)); + tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); + } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); if (mi_likely(!use_delayed)) { // increment the thread free count and return @@ -145,10 +145,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // and reset the MI_DELAYED_FREEING flag do { - tfreex.value = tfree.value = page->thread_free.value; - mi_assert_internal(tfree.delayed == MI_NEVER_DELAYED_FREE || tfree.delayed == MI_DELAYED_FREEING); - if (tfree.delayed != MI_NEVER_DELAYED_FREE) tfreex.delayed = MI_NO_DELAYED_FREE; - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); + tfreex = tfree = page->thread_free; + mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); } } @@ -249,12 +249,7 @@ void mi_free(void* p) mi_attr_noexcept } } -void _mi_free_delayed_block(mi_block_t* block) { - mi_assert_internal(block != NULL); - const mi_segment_t* segment = _mi_ptr_segment(block); - mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(_mi_thread_id() == segment->thread_id); - mi_page_t* page = _mi_segment_page_of(segment,block); +void _mi_free_delayed_block(mi_page_t* page, mi_block_t* block) { _mi_free_block(page,true,block); } diff --git a/src/init.c b/src/init.c index f6f36c37..5b2d3c8e 100644 --- a/src/init.c +++ b/src/init.c @@ -14,7 +14,7 @@ const mi_page_t _mi_page_empty = { 0, false, false, {0}, 0, 0, NULL, 0, 0, // free, used, cookie - NULL, 0, {0}, + NULL, 0, 0, 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==4) , { NULL } diff --git a/src/page.c b/src/page.c index 9f012256..9bc1ab87 100644 --- a/src/page.c +++ b/src/page.c @@ -82,7 +82,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(mi_page_list_is_valid(page,page->free)); mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); - mi_block_t* tfree = (mi_block_t*)((uintptr_t)page->thread_free.head << MI_TF_PTR_SHIFT); + mi_block_t* tfree = mi_tf_block(page->thread_free); mi_assert_internal(mi_page_list_is_valid(page, tfree)); size_t tfree_count = mi_page_list_count(page, tfree); mi_assert_internal(tfree_count <= page->thread_freed + 1); @@ -114,17 +114,17 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { mi_thread_free_t tfreex; do { - tfreex.value = tfree.value = page->thread_free.value; - if (mi_unlikely(tfree.delayed < MI_DELAYED_FREEING)) { - tfreex.delayed = delay; + tfreex = tfree = page->thread_free; + if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) { + tfreex = mi_tf_set_delayed(tfree,delay); } - else if (mi_unlikely(tfree.delayed == MI_DELAYED_FREEING)) { + else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. continue; // and try again } } - while(tfreex.delayed != tfree.delayed && // avoid atomic operation if already equal - !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); + while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal + !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); } @@ -139,13 +139,13 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { static void mi_page_thread_free_collect(mi_page_t* page) { mi_block_t* head; - mi_thread_free_t tfree = {0}; - mi_thread_free_t tfreex = {0}; + mi_thread_free_t tfree = 0; + mi_thread_free_t tfreex = 0; do { - tfreex.value = tfree.value = page->thread_free.value; - head = (mi_block_t*)((uintptr_t)tfree.head << MI_TF_PTR_SHIFT); - tfreex.head = 0; - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); + tfreex = tfree = page->thread_free; + head = mi_tf_block(tfree); + tfreex = mi_tf_set_block(tfree,NULL); + } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); // return if the list is empty if (head == NULL) return; @@ -189,7 +189,7 @@ void _mi_page_free_collect(mi_page_t* page) { page->local_free = NULL; } // and the thread free list - if (page->thread_free.head != 0) { // quick test to avoid an atomic operation + if (mi_tf_block(page->thread_free) != NULL) { // quick test to avoid an atomic operation mi_page_thread_free_collect(page); } } @@ -258,8 +258,25 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { mi_block_t* next = mi_block_nextx(heap->cookie,block); - // use internal free instead of regular one to keep stats etc correct - _mi_free_delayed_block(block); + // get segment and page + const mi_segment_t* segment = _mi_ptr_segment(block); + mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(_mi_thread_id() == segment->thread_id); + mi_page_t* page = _mi_segment_page_of(segment, block); + if (mi_tf_delayed(page->thread_free) != MI_DELAYED_FREEING) { + // use internal free instead of regular one to keep stats etc correct + _mi_free_delayed_block(page,block); + } + else { + // we might already start delayed freeing while another thread has not yet + // reset the flag; in that case delay it further :-( + mi_block_t* dfree; + do { + dfree = (mi_block_t*)heap->thread_delayed_free; + mi_block_set_nextx(heap->cookie, block, dfree); + } while (!mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, block, dfree)); + + } block = next; } } @@ -334,11 +351,9 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_internal(mi_page_all_free(page)); #if MI_DEBUG>1 // check if we can safely free - mi_thread_free_t free; - free.value = page->thread_free.value; - free.delayed = MI_NEVER_DELAYED_FREE; - free.value = mi_atomic_exchange(&page->thread_free.value, free.value); - mi_assert_internal(free.delayed != MI_DELAYED_FREEING); + mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE); + free = mi_atomic_exchange(&page->thread_free, free); + mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); #endif page->flags.has_aligned = false; @@ -538,7 +553,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); - mi_assert_internal(page->thread_free.value == 0); + mi_assert_internal(page->thread_free == 0); mi_assert_internal(page->thread_freed == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL);