From eb25093b13b57cb83113527b7df47fcfb1a427c3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 21 Aug 2019 09:40:57 -0700 Subject: [PATCH 1/4] fix mi_cdecl for older clang versions --- include/mimalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 6615e2e2..9f27e463 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -53,8 +53,8 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #endif + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #else #define mi_decl_thread __thread #define mi_decl_export From 25ea9cf142548a65c5109798706bcc872886d93b Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 21:38:45 -0700 Subject: [PATCH 2/4] on windows use 4TiB area for aligned allocation --- src/os.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/os.c b/src/os.c index 7afe447e..e7ed57b5 100644 --- a/src/os.c +++ b/src/os.c @@ -184,6 +184,18 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { +#if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations + static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB + if (addr == NULL && try_alignment > 0 && + try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) + { + intptr_t hint = mi_atomic_add(&aligned_base, size) - size; + if (hint%try_alignment == 0) { + return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE); + } + } +#endif #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { // on modern Windows try use VirtualAlloc2 for aligned allocation From 15552eba790e7a7e6d8477236c7c51fdb9288ee0 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 13:44:43 -0700 Subject: [PATCH 3/4] ensure volatile declaration for abandoned_next field --- include/mimalloc-types.h | 2 +- src/page-queue.c | 1 + src/segment.c | 16 +++++++++------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index c0778f87..dd1f05e3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -204,7 +204,7 @@ typedef enum mi_page_kind_e { typedef struct mi_segment_s { struct mi_segment_s* next; struct mi_segment_s* prev; - struct mi_segment_s* abandoned_next; + volatile struct mi_segment_s* abandoned_next; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) diff --git a/src/page-queue.c b/src/page-queue.c index e476403b..859b1d57 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -130,6 +130,7 @@ extern inline uint8_t _mi_bin(size_t size) { // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + mi_assert_internal(bin < MI_BIN_HUGE); } mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); return bin; diff --git a/src/segment.c b/src/segment.c index d5a2288a..6379b24a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -558,13 +558,15 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); // all pages in the segment are abandoned; add it to the abandoned list - segment->thread_id = 0; - do { - segment->abandoned_next = (mi_segment_t*)abandoned; - } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, segment->abandoned_next)); - mi_atomic_increment(&abandoned_count); - _mi_stat_increase(&tld->stats->segments_abandoned,1); + _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); + segment->thread_id = 0; + mi_segment_t* next; + do { + next = (mi_segment_t*)abandoned; + mi_atomic_write_ptr((volatile void**)&segment->abandoned_next, next); + } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, next)); + mi_atomic_increment(&abandoned_count); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -598,7 +600,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_t* segment; do { segment = (mi_segment_t*)abandoned; - } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment->abandoned_next, segment)); + } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, (mi_segment_t*)segment->abandoned_next, segment)); if (segment==NULL) break; // stop early if no more segments available // got it. From 6c6fcad242ebedba6ee07cff2d255457eb811bb8 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 14:08:00 -0700 Subject: [PATCH 4/4] remove threadid from pages and keep page flags separate (cherry picked) --- include/mimalloc-internal.h | 25 +++++-------------------- include/mimalloc-types.h | 30 ++++++++++++++++-------------- src/alloc.c | 10 +++++----- src/init.c | 9 +++++---- src/page.c | 1 - src/segment.c | 18 +++++------------- 6 files changed, 36 insertions(+), 57 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6455d57e..d886bcec 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -315,39 +315,24 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) } + //----------------------------------------------------------- // Page flags //----------------------------------------------------------- -static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { - return (page->flags & ~MI_PAGE_FLAGS_MASK); -} - -static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) { - mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0); - page->flags = thread_id; -} - -static inline void mi_page_set_thread_id(mi_page_t* page, uintptr_t thread_id) { - mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0); - page->flags = thread_id | (page->flags & MI_PAGE_FLAGS_MASK); -} - static inline bool mi_page_is_in_full(const mi_page_t* page) { - return ((page->flags & 0x01) != 0); + return page->flags.in_full; } static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { - if (in_full) page->flags |= 0x01; - else page->flags &= ~0x01; + page->flags.in_full = in_full; } static inline bool mi_page_has_aligned(const mi_page_t* page) { - return ((page->flags & 0x02) != 0); + return page->flags.has_aligned; } static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { - if (has_aligned) page->flags |= 0x02; - else page->flags &= ~0x02; + page->flags.has_aligned = has_aligned; } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index dd1f05e3..c20b663a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -124,12 +124,15 @@ typedef enum mi_delayed_e { } mi_delayed_t; -// Use the bottom 2 bits for the `in_full` and `has_aligned` flags -// and the rest for the threadid (we assume tid's never use those lower 2 bits). -// This allows a single test in `mi_free` to check for unlikely cases -// (namely, non-local free, aligned free, or freeing in a full page) -#define MI_PAGE_FLAGS_MASK ((uintptr_t)0x03) -typedef uintptr_t mi_page_flags_t; +// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// test if both are false (`value == 0`) in the `mi_free` routine. +typedef union mi_page_flags_u { + uint16_t value; + struct { + bool in_full; + bool has_aligned; + }; +} mi_page_flags_t; // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags @@ -163,12 +166,12 @@ typedef struct mi_page_s { // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - // 16 bits padding + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (16 bits) + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists #endif - mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1 size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) @@ -181,12 +184,11 @@ typedef struct mi_page_s { struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` -// improve page index calculation -#if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - void* padding[1]; // 12 words on 64-bit -#elif MI_INTPTR_SIZE==4 - // void* padding[1]; // 12 words on 32-bit -#endif + // improve page index calculation + // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word + #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) + void* padding[1]; // 12 words on 64-bit in secure mode, 12 words on 32-bit plain + #endif } mi_page_t; diff --git a/src/alloc.c b/src/alloc.c index 9be2ef40..b7881ea5 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -225,19 +225,19 @@ void mi_free(void* p) mi_attr_noexcept } #endif + const uintptr_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); - mi_heap_stat_decrease( heap, malloc, mi_usable_size(p)); + mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease( heap, normal[_mi_bin(page->block_size)], 1); + mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); } // huge page stat is accounted for in `_mi_page_retire` #endif - const uintptr_t tid = _mi_thread_id(); - if (mi_likely(tid == page->flags)) { // if equal, the thread id matches and it is not a full page, nor has aligned blocks + if (mi_likely(tid == segment->thread_id && page->flags.value == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; mi_block_set_next(page, block, page->local_free); @@ -247,7 +247,7 @@ void mi_free(void* p) mi_attr_noexcept } else { // non-local, aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, tid == mi_page_thread_id(page), p); + mi_free_generic(segment, page, tid == segment->thread_id, p); } } diff --git a/src/init.c b/src/init.c index ec64def8..76e586f2 100644 --- a/src/init.c +++ b/src/init.c @@ -13,15 +13,16 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, 0, 0, + { 0 }, NULL, // free #if MI_SECURE 0, #endif - 0, 0, // flags, used + 0, // used NULL, 0, 0, 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - , { NULL } + #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) + , { NULL } // padding #endif }; @@ -350,7 +351,7 @@ void mi_thread_init(void) mi_attr_noexcept pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called #endif - #if (MI_DEBUG>0) // not in release mode as that leads to crashes on Windows dynamic override + #if (MI_DEBUG>0) && !defined(NDEBUG) // not in release mode as that leads to crashes on Windows dynamic override _mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); #endif } diff --git a/src/page.c b/src/page.c index 549ced38..a7b4a760 100644 --- a/src/page.c +++ b/src/page.c @@ -75,7 +75,6 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); - mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); diff --git a/src/segment.c b/src/segment.c index 6379b24a..b1a5221c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -549,14 +549,11 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); mi_assert_expensive(mi_segment_is_valid(segment)); -#if MI_DEBUG>1 - for (size_t i = 0; i < segment->capacity; i++) { - mi_assert_internal(!segment->pages[i].segment_in_use || mi_page_thread_id(&segment->pages[i]) == 0); - } -#endif + // remove the segment from the free page queue if needed mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); + // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); @@ -570,11 +567,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert(page != NULL && mi_page_thread_id(page) != 0); + mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment)); - segment->abandoned++; - mi_page_set_thread_id(page, 0); + segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { @@ -626,7 +622,6 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { // otherwise reclaim it - mi_page_set_thread_id(page,segment->thread_id); _mi_page_reclaim(heap,page); } } @@ -656,8 +651,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld->stats); - page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); + page->segment_in_use = true; segment->used++; mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { @@ -697,7 +691,6 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); return page; } @@ -709,7 +702,6 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); return page; }