diff --git a/CMakeLists.txt b/CMakeLists.txt index e6026004..c9de8618 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,10 +163,12 @@ target_include_directories(mimalloc-obj PUBLIC $ ) -# seems to lead to cmake warnings/errors on some systems, disable for now :-( +# the following seems to lead to cmake warnings/errors on some systems, disable for now :-( # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_dir}) -install(FILES $ +# the FILES expression can also be: $ +# but that fails cmake versions less than 3.10 so we leave it as is for now +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION} DESTINATION ${mi_install_dir} RENAME ${mi_basename}${CMAKE_C_OUTPUT_EXTENSION} ) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 3aa0d48a..13d777ef 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -95,7 +95,7 @@ void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_at void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero); void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero); mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); -void _mi_free_delayed_block(mi_block_t* block); +bool _mi_free_delayed_block(mi_block_t* block); #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); @@ -234,6 +234,23 @@ static inline mi_page_t* _mi_ptr_page(void* p) { return _mi_segment_page_of(_mi_ptr_segment(p), p); } +// Thread free access +static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { + return (mi_block_t*)(tf & ~0x03); +} +static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { + return (mi_delayed_t)(tf & 0x03); +} +static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { + return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); +} +static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { + return mi_tf_make(mi_tf_block(tf),delayed); +} +static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { + return mi_tf_make(block, mi_tf_delayed(tf)); +} + // are all blocks in a page freed? static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); @@ -248,7 +265,7 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { // are there free blocks in this page? static inline bool mi_page_has_free(mi_page_t* page) { mi_assert_internal(page != NULL); - bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (page->thread_free.head != 0)); + bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL)); mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity); return hasfree; } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 046c1a14..d591ff86 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -133,20 +133,9 @@ typedef union mi_page_flags_u { } mi_page_flags_t; // Thread free list. -// We use 2 bits of the pointer for the `use_delayed_free` and `delayed_freeing` flags. -typedef union mi_thread_free_u { - volatile uintptr_t value; - struct { - uintptr_t delayed:2; -#if MI_INTPTR_SIZE==8 - uintptr_t head:62; // head free block in the list (right-shifted by 2) -#elif MI_INTPTR_SIZE==4 - uintptr_t head:30; -#endif - }; -} mi_thread_free_t; +// We use bottom 2 bits of the pointer for mi_delayed_t flags +typedef uintptr_t mi_thread_free_t; -#define MI_TF_PTR_SHIFT (2) // A page contains blocks of one specific size (`block_size`). // Each page has three list of free blocks: diff --git a/src/alloc.c b/src/alloc.c index 1a7c6da6..b1006658 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -109,25 +109,25 @@ void* mi_zalloc(size_t size) mi_attr_noexcept { // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { - mi_thread_free_t tfree = {0}; - mi_thread_free_t tfreex = {0}; + mi_thread_free_t tfree; + mi_thread_free_t tfreex; bool use_delayed; do { - tfreex.value = tfree.value = page->thread_free.value; - use_delayed = (tfree.delayed == MI_USE_DELAYED_FREE || - (tfree.delayed == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1) + tfree = page->thread_free; + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || + (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1) ); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list - tfreex.delayed = MI_DELAYED_FREEING; + tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); } else { // usual: directly add to page thread_free list - mi_block_set_next(page, block, (mi_block_t*)((uintptr_t)tfree.head << MI_TF_PTR_SHIFT)); - tfreex.head = (uintptr_t)block >> MI_TF_PTR_SHIFT; + mi_block_set_next(page, block, mi_tf_block(tfree)); + tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); + } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); if (mi_likely(!use_delayed)) { // increment the thread free count and return @@ -148,10 +148,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // and reset the MI_DELAYED_FREEING flag do { - tfreex.value = tfree.value = page->thread_free.value; - mi_assert_internal(tfree.delayed == MI_NEVER_DELAYED_FREE || tfree.delayed == MI_DELAYED_FREEING); - if (tfree.delayed != MI_NEVER_DELAYED_FREE) tfreex.delayed = MI_NO_DELAYED_FREE; - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); + tfreex = tfree = page->thread_free; + mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); } } @@ -252,13 +252,20 @@ void mi_free(void* p) mi_attr_noexcept } } -void _mi_free_delayed_block(mi_block_t* block) { - mi_assert_internal(block != NULL); +bool _mi_free_delayed_block(mi_block_t* block) { + // get segment and page const mi_segment_t* segment = _mi_ptr_segment(block); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); - mi_page_t* page = _mi_segment_page_of(segment,block); + mi_page_t* page = _mi_segment_page_of(segment, block); + if (mi_tf_delayed(page->thread_free) == MI_DELAYED_FREEING) { + // we might already start delayed freeing while another thread has not yet + // reset the delayed_freeing flag; in that case don't free it quite yet if + // this is the last block remaining. + if (page->used - page->thread_freed == 1) return false; + } _mi_free_block(page,true,block); + return true; } // Bytes available in a block diff --git a/src/init.c b/src/init.c index 54083969..8ab520eb 100644 --- a/src/init.c +++ b/src/init.c @@ -14,7 +14,7 @@ const mi_page_t _mi_page_empty = { 0, false, false, false, {0}, 0, 0, NULL, 0, 0, // free, used, cookie - NULL, 0, {0}, + NULL, 0, 0, 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==4) , { NULL } diff --git a/src/page.c b/src/page.c index 1fe8a86d..685b6b4a 100644 --- a/src/page.c +++ b/src/page.c @@ -82,7 +82,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(mi_page_list_is_valid(page,page->free)); mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); - mi_block_t* tfree = (mi_block_t*)((uintptr_t)page->thread_free.head << MI_TF_PTR_SHIFT); + mi_block_t* tfree = mi_tf_block(page->thread_free); mi_assert_internal(mi_page_list_is_valid(page, tfree)); size_t tfree_count = mi_page_list_count(page, tfree); mi_assert_internal(tfree_count <= page->thread_freed + 1); @@ -114,17 +114,17 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { mi_thread_free_t tfreex; do { - tfreex.value = tfree.value = page->thread_free.value; - if (mi_unlikely(tfree.delayed < MI_DELAYED_FREEING)) { - tfreex.delayed = delay; + tfreex = tfree = page->thread_free; + if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) { + tfreex = mi_tf_set_delayed(tfree,delay); } - else if (mi_unlikely(tfree.delayed == MI_DELAYED_FREEING)) { + else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. continue; // and try again } } - while(tfreex.delayed != tfree.delayed && // avoid atomic operation if already equal - !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); + while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal + !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); } @@ -139,13 +139,13 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { static void mi_page_thread_free_collect(mi_page_t* page) { mi_block_t* head; - mi_thread_free_t tfree = {0}; - mi_thread_free_t tfreex = {0}; + mi_thread_free_t tfree; + mi_thread_free_t tfreex; do { - tfreex.value = tfree.value = page->thread_free.value; - head = (mi_block_t*)((uintptr_t)tfree.head << MI_TF_PTR_SHIFT); - tfreex.head = 0; - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex.value, tfree.value)); + tfreex = tfree = page->thread_free; + head = mi_tf_block(tfree); + tfreex = mi_tf_set_block(tfree,NULL); + } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); // return if the list is empty if (head == NULL) return; @@ -175,7 +175,7 @@ void _mi_page_free_collect(mi_page_t* page) { // free the local free list if (page->local_free != NULL) { if (mi_likely(page->free == NULL)) { - // usual caes + // usual case page->free = page->local_free; } else { @@ -189,7 +189,7 @@ void _mi_page_free_collect(mi_page_t* page) { page->local_free = NULL; } // and the thread free list - if (page->thread_free.head != 0) { // quick test to avoid an atomic operation + if (mi_tf_block(page->thread_free) != NULL) { // quick test to avoid an atomic operation mi_page_thread_free_collect(page); } } @@ -259,7 +259,16 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { while(block != NULL) { mi_block_t* next = mi_block_nextx(heap->cookie,block); // use internal free instead of regular one to keep stats etc correct - _mi_free_delayed_block(block); + if (!_mi_free_delayed_block(block)) { + // we might already start delayed freeing while another thread has not yet + // reset the delayed_freeing flag; in that case delay it further by reinserting. + mi_block_t* dfree; + do { + dfree = (mi_block_t*)heap->thread_delayed_free; + mi_block_set_nextx(heap->cookie, block, dfree); + } while (!mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, block, dfree)); + + } block = next; } } @@ -334,11 +343,9 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_internal(mi_page_all_free(page)); #if MI_DEBUG>1 // check if we can safely free - mi_thread_free_t free; - free.value = page->thread_free.value; - free.delayed = MI_NEVER_DELAYED_FREE; - free.value = mi_atomic_exchange(&page->thread_free.value, free.value); - mi_assert_internal(free.delayed != MI_DELAYED_FREEING); + mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE); + free = mi_atomic_exchange(&page->thread_free, free); + mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); #endif page->flags.has_aligned = false; @@ -377,7 +384,7 @@ void _mi_page_retire(mi_page_t* page) { // is the only page left with free blocks. It is not clear // how to check this efficiently though... for now we just check // if its neighbours are almost fully used. - if (mi_likely(page->block_size <= MI_LARGE_SIZE_MAX)) { + if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { return; // dont't retire after all } @@ -533,7 +540,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); - mi_assert_internal(page->thread_free.value == 0); + mi_assert_internal(page->thread_free == 0); mi_assert_internal(page->thread_freed == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL);