From 6596e970a52d28a5e449e9765c33f88787a96bb5 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 8 Aug 2019 15:23:18 -0700 Subject: [PATCH] move in_full and has_aligned into page threadid for a single test in mi_free --- include/mimalloc-internal.h | 4 ++++ include/mimalloc-types.h | 32 ++++++++++++++++++++++---------- src/alloc.c | 25 +++++++++---------------- src/init.c | 9 +++++---- src/page.c | 7 ++++--- src/segment.c | 8 ++++++-- 6 files changed, 50 insertions(+), 35 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cc487a21..2c3ccffd 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -314,6 +314,10 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; } +static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { + return (page->flags.padding << MI_PAGE_FLAGS_BITS); +} + // ------------------------------------------------------------------- // Encoding/Decoding the free list next pointers // ------------------------------------------------------------------- diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 6c094091..073d23d3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -123,14 +123,26 @@ typedef enum mi_delayed_e { } mi_delayed_t; +// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags +// This allows a single test in `mi_free` to check for unlikely cases +// (namely, non-local free, aligned free, or freeing in a full page) +#define MI_PAGE_FLAGS_BITS (2) typedef union mi_page_flags_u { - uint16_t value; + uintptr_t threadidx; struct { - bool has_aligned; - bool in_full; + #ifdef MI_BIG_ENDIAN + uintptr_t padding : (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS); + uintptr_t in_full : 1; + uintptr_t has_aligned : 1; + #else + uintptr_t in_full : 1; + uintptr_t has_aligned : 1; + uintptr_t padding : (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS); + #endif }; } mi_page_flags_t; + // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags typedef uintptr_t mi_thread_free_t; @@ -160,16 +172,16 @@ typedef struct mi_page_s { bool is_reset:1; // `true` if the page memory was reset bool is_committed:1; // `true` if the page virtual memory is committed - // layout like this to optimize access in `mi_malloc` and `mi_free` - mi_page_flags_t flags; + // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - + // 16 bits padding mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1 mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) volatile uintptr_t thread_freed; // at least this number of blocks are in `thread_free` @@ -182,10 +194,10 @@ typedef struct mi_page_s { struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // improve page index calculation -#if MI_INTPTR_SIZE==8 - //void* padding[1]; // 12 words on 64-bit +#if (MI_INTPTR_SIZE==8 && MI_SECURE==0) + void* padding[1]; // 12 words on 64-bit #elif MI_INTPTR_SIZE==4 - void* padding[1]; // 12 words on 32-bit + // void* padding[1]; // 12 words on 32-bit #endif } mi_page_t; @@ -215,7 +227,7 @@ typedef struct mi_segment_s { // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - uintptr_t thread_id; // unique id of the thread owning this segment + volatile uintptr_t thread_id; // unique id of the thread owning this segment mi_page_kind_t page_kind; // kind of pages: small, large, or huge mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; diff --git a/src/alloc.c b/src/alloc.c index 6a91c0ad..f23ed896 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -223,8 +223,7 @@ void mi_free(void* p) mi_attr_noexcept return; } #endif - - bool local = (_mi_thread_id() == segment->thread_id); // preload, note: putting the thread_id in the page->flags does not improve performance + mi_page_t* page = _mi_segment_page_of(segment, p); #if (MI_STAT>1) @@ -237,23 +236,17 @@ void mi_free(void* p) mi_attr_noexcept #endif // adjust if it might be an un-aligned block - if (mi_likely(page->flags.value==0)) { // not full or aligned + uintptr_t tid = _mi_thread_id(); + if (mi_likely(tid == page->flags.threadidx)) { // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance - // owning thread can free a block directly - mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance - page->local_free = block; - page->used--; - if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } - } - else { - // use atomic operations for a multi-threaded free - _mi_free_block_mt(page, block); - } + mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance + page->local_free = block; + page->used--; + if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } } else { - // aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, local, p); + // non-local, aligned blocks, or a full page; use the more generic path + mi_free_generic(segment, page, tid == mi_page_thread_id(page), p); } } diff --git a/src/init.c b/src/init.c index 3b060fa4..13ed9561 100644 --- a/src/init.c +++ b/src/init.c @@ -12,15 +12,16 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, {0}, 0, 0, - NULL, 0, // free, used + 0, false, false, false, 0, 0, + NULL, // free #if MI_SECURE 0, #endif + 0, {0}, // used, flags NULL, 0, 0, 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==4) - , { NULL } + #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) + , { NULL } #endif }; diff --git a/src/page.c b/src/page.c index aa8a8415..4ff797c0 100644 --- a/src/page.c +++ b/src/page.c @@ -71,10 +71,11 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->block_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); - + mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -458,7 +459,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si heap->random = _mi_random_shuffle(rnd); } -static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) +static void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats) { UNUSED(stats); mi_assert_internal(page->free == NULL); @@ -524,7 +525,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st // and append the extend the free list if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(heap, page, extend, stats ); + mi_page_free_list_extend(page, extend, stats ); } else { mi_page_free_list_extend_secure(heap, page, extend, stats); diff --git a/src/segment.c b/src/segment.c index a86c3bc0..256c30eb 100644 --- a/src/segment.c +++ b/src/segment.c @@ -226,6 +226,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { + segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); if (mi_option_is_enabled(mi_option_secure)) { _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set @@ -412,8 +413,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - segment->thread_id = 0; + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); // update reset memory statistics /* @@ -618,6 +618,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { // otherwise reclaim it + page->flags.threadidx = segment->thread_id; _mi_page_reclaim(heap,page); } } @@ -648,6 +649,7 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld->stats); page->segment_in_use = true; + page->flags.threadidx = segment->thread_id; segment->used++; mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { @@ -687,6 +689,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; + page->flags.threadidx = segment->thread_id; return page; } @@ -698,6 +701,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; + page->flags.threadidx = segment->thread_id; return page; }