From bbd81bbbd1bbe1de30b09dcfc6da22f31c4f5768 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 15 Aug 2019 00:46:45 -0700 Subject: [PATCH 001/352] wip: new segment allocation with flexible large objects --- ide/vs2017/mimalloc-override.vcxproj | 5 +- ide/vs2017/mimalloc-override.vcxproj.filters | 3 - ide/vs2017/mimalloc.vcxproj | 1 - ide/vs2017/mimalloc.vcxproj.filters | 3 - include/mimalloc-internal.h | 47 +- include/mimalloc-types.h | 78 +- src/alloc-aligned.c | 2 +- src/alloc.c | 8 +- src/heap.c | 10 +- src/init.c | 21 +- src/page-queue.c | 18 +- src/page.c | 24 +- src/segment.c | 892 ++++++++++++------- src/stats.c | 8 +- test/main-override-static.c | 162 +++- 15 files changed, 843 insertions(+), 439 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 7d452b55..d9bce9c0 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -95,7 +95,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default @@ -118,7 +118,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default @@ -225,7 +225,6 @@ - diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index df0bf5ed..639a9d4e 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -58,9 +58,6 @@ Source Files - - Source Files - Source Files diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 3e453471..5b151da7 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -227,7 +227,6 @@ - true diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 28d94e99..d32080f5 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -47,9 +47,6 @@ Source Files - - Source Files - Source Files diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f6f2e2ae..f17d8af0 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -39,10 +39,20 @@ bool _mi_preloading(); // true while the C runtime is not ready // os.c size_t _mi_os_page_size(void); +size_t _mi_os_large_page_size(); void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld); + +/* // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld); void* _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld); @@ -55,6 +65,7 @@ bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); void _mi_mem_collect(mi_stats_t* stats); +*/ // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -62,7 +73,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; @@ -233,27 +244,47 @@ static inline mi_segment_t* _mi_ptr_segment(const void* p) { return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); } +static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { + mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0); + return (mi_page_t*)(s); +} + +static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { + mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0); + return (mi_slice_t*)(p); +} + +static size_t mi_slice_index(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + ptrdiff_t index = slice - segment->slices; + mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); + return index; +} + // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]); + mi_assert_internal(segment == NULL || page == mi_slice_to_page(&segment->slices[mi_slice_index(mi_page_to_slice((mi_page_t*)page))])); return segment; } // Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { - // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); - uintptr_t idx = (uintptr_t)diff >> segment->page_shift; - mi_assert_internal(idx < segment->capacity); - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); - return &((mi_segment_t*)segment)->pages[idx]; + uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT; + mi_assert_internal(idx < segment->slice_count); + mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; + mi_slice_t* slice = slice0 - slice0->slice_offset; // adjust to the block that holds the page data + mi_assert_internal(slice->slice_count > slice0->slice_offset); + mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count); + return mi_slice_to_page(slice); } // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - return _mi_segment_page_start(segment, page, page->block_size, page_size); + return _mi_segment_page_start(segment, page, page_size); } // Get the page containing the pointer diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 67ad8516..7e14daca 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -74,27 +74,28 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit -#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb -#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4mb -#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb +#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb +#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64mb + +#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SEGMENT_SLICE_SHIFT) // 512kb + // Derived constants -#define MI_SEGMENT_SIZE (1<>MI_INTPTR_SHIFT) -#define MI_HUGE_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` @@ -103,7 +104,7 @@ terms of the MIT license. A copy of the license can be found in the file // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) -#if (MI_LARGE_WSIZE_MAX >= 655360) +#if (MI_MEDIUM_WSIZE_MAX >= 655360) #error "define more bins" #endif @@ -154,20 +155,20 @@ typedef uintptr_t mi_thread_free_t; // - using `uint16_t` does not seem to slow things down typedef struct mi_page_s { // "owned" by the segment - uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` - bool segment_in_use:1; // `true` if the segment allocated this page - bool is_reset:1; // `true` if the page memory was reset - bool is_committed:1; // `true` if the page virtual memory is committed + size_t slice_count; // slices in this page (0 if not a page) + uint16_t slice_offset; // distance from the actual page data slice (0 if a page) + bool is_reset; // `true` if the page memory was reset + bool is_committed; // `true` if the page virtual memory is committed // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - // 16 bits padding + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists #endif - mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1 + mi_page_flags_t flags; size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) @@ -182,7 +183,7 @@ typedef struct mi_page_s { // improve page index calculation #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - void* padding[1]; // 12 words on 64-bit + // void* padding[1]; // 12 words on 64-bit #elif MI_INTPTR_SIZE==4 // void* padding[1]; // 12 words on 32-bit #endif @@ -193,30 +194,37 @@ typedef struct mi_page_s { typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64kb pages inside a segment MI_PAGE_MEDIUM, // medium blocks go into 512kb pages inside a segment - MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment - MI_PAGE_HUGE // huge blocks (>512kb) are put into a single page in a segment of the exact size (but still 2mb aligned) + MI_PAGE_LARGE, // larger blocks go into a page of just one block + MI_PAGE_HUGE, // huge blocks (>16mb) are put into a single page in a single segment. } mi_page_kind_t; +typedef enum mi_segment_kind_e { + MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside. + MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. +} mi_segment_kind_t; + +typedef mi_page_t mi_slice_t; + // Segments are large allocated memory blocks (2mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { struct mi_segment_s* next; struct mi_segment_s* prev; - struct mi_segment_s* abandoned_next; + struct mi_segment_s* abandoned_next; // abandoned segment stack: `used == abandoned` size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t used; // count of pages in use (`used <= capacity`) - size_t capacity; // count of available pages (`#free + used`) + size_t used; // count of pages in use size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` size_t memid; // id for the os-level memory manager + bool all_committed; // layout like this to optimize access in `mi_free` - size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - volatile uintptr_t thread_id; // unique id of the thread owning this segment - mi_page_kind_t page_kind; // kind of pages: small, large, or huge - mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages + mi_segment_kind_t kind; + uintptr_t thread_id; + size_t slice_count; // slices in this segment (at most MI_SLICES_PER_SEGMENT) + mi_slice_t slices[MI_SLICES_PER_SEGMENT]; } mi_segment_t; @@ -326,13 +334,13 @@ typedef struct mi_stats_s { mi_stat_count_t commit_calls; mi_stat_count_t threads; mi_stat_count_t huge; - mi_stat_count_t giant; + mi_stat_count_t large; mi_stat_count_t malloc; mi_stat_count_t segments_cache; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t huge_count; - mi_stat_counter_t giant_count; + mi_stat_counter_t large_count; #if MI_STAT>1 mi_stat_count_t normal[MI_BIN_HUGE+1]; #endif @@ -367,11 +375,11 @@ typedef struct mi_segment_queue_s { mi_segment_t* last; } mi_segment_queue_t; +#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SEGMENT_SIZE) // Segments thread local data typedef struct mi_segments_tld_s { - mi_segment_queue_t small_free; // queue of segments with free small pages - mi_segment_queue_t medium_free; // queue of segments with free medium pages + mi_page_queue_t pages[MI_SEGMENT_BIN_MAX+1]; // free pages inside segments size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 24f6c440..c605d637 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -43,7 +43,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* heap, size_t size, size_t if (p == NULL) return NULL; // .. and align within the allocation - mi_page_set_has_aligned( _mi_ptr_page(p), true ); + mi_page_set_has_aligned(_mi_ptr_page(p), true); uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment); mi_assert_internal(adjust % sizeof(uintptr_t) == 0); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); diff --git a/src/alloc.c b/src/alloc.c index bbe504a1..b5a48bde 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -226,7 +226,7 @@ void mi_free(void* p) mi_attr_noexcept #endif mi_page_t* const page = _mi_segment_page_of(segment, p); - + #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); mi_heap_stat_decrease( heap, malloc, mi_usable_size(p)); @@ -235,9 +235,9 @@ void mi_free(void* p) mi_attr_noexcept } // huge page stat is accounted for in `_mi_page_retire` #endif - - const uintptr_t tid = _mi_thread_id(); - if (mi_likely(tid == page->flags)) { // if equal, the thread id matches and it is not a full page, nor has aligned blocks + + uintptr_t tid = _mi_thread_id(); + if (mi_likely(page->flags == tid)) { // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; mi_block_set_next(page, block, page->local_free); diff --git a/src/heap.c b/src/heap.c index 768cab96..7b5d7a07 100644 --- a/src/heap.c +++ b/src/heap.c @@ -150,7 +150,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions if (collect >= FORCE && _mi_is_main_thread()) { - _mi_mem_collect(&heap->tld->stats); + // _mi_mem_collect(&heap->tld->stats); } } @@ -245,9 +245,9 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); // stats - if (page->block_size > MI_LARGE_SIZE_MAX) { - if (page->block_size > MI_HUGE_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.giant,page->block_size); + if (page->block_size > MI_MEDIUM_SIZE_MAX) { + if (page->block_size <= MI_LARGE_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.large,page->block_size); } else { _mi_stat_decrease(&heap->tld->stats.huge, page->block_size); @@ -255,7 +255,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ } #if (MI_STAT>1) size_t inuse = page->used - page->thread_freed; - if (page->block_size <= MI_LARGE_SIZE_MAX) { + if (page->block_size <= MI_MEDIUM_SIZE_MAX) { mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); } mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... diff --git a/src/init.c b/src/init.c index 77ce4aad..ff0fa76c 100644 --- a/src/init.c +++ b/src/init.c @@ -21,7 +21,7 @@ const mi_page_t _mi_page_empty = { NULL, 0, 0, 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - , { NULL } + // , { NULL } #endif }; @@ -43,8 +43,8 @@ const mi_page_t _mi_page_empty = { QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ - QNULL(MI_LARGE_WSIZE_MAX + 1 /* 655360, Huge queue */), \ - QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } + QNULL(MI_MEDIUM_WSIZE_MAX + 1 /* 655360, Huge queue */), \ + QNULL(MI_MEDIUM_WSIZE_MAX + 2) /* Full queue */ } #define MI_STAT_COUNT_NULL() {0,0,0,0} @@ -91,14 +91,23 @@ const mi_heap_t _mi_heap_empty = { mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; +// Empty page queues for every bin +#define MI_SEGMENT_PAGE_QUEUES_EMPTY \ + { QNULL(0), \ + QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ + QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ + QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ + QNULL( 160), QNULL( 192), QNULL( 224), /* 27 */ } + + #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) static mi_tld_t tld_main = { 0, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { MI_SEGMENT_PAGE_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments + { 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { diff --git a/src/page-queue.c b/src/page-queue.c index e59620c2..f396e233 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -34,15 +34,15 @@ terms of the MIT license. A copy of the license can be found in the file static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) { - return (pq->block_size == (MI_LARGE_SIZE_MAX+sizeof(uintptr_t))); + return (pq->block_size == (MI_MEDIUM_SIZE_MAX+sizeof(uintptr_t))); } static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) { - return (pq->block_size == (MI_LARGE_SIZE_MAX+(2*sizeof(uintptr_t)))); + return (pq->block_size == (MI_MEDIUM_SIZE_MAX+(2*sizeof(uintptr_t)))); } static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { - return (pq->block_size > MI_LARGE_SIZE_MAX); + return (pq->block_size > MI_MEDIUM_SIZE_MAX); } /* ----------------------------------------------------------- @@ -116,7 +116,7 @@ extern inline uint8_t _mi_bin(size_t size) { bin = (uint8_t)wsize; } #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { + else if (wsize > MI_MEDIUM_WSIZE_MAX) { bin = MI_BIN_HUGE; } else { @@ -147,7 +147,7 @@ size_t _mi_bin_size(uint8_t bin) { // Good size for allocation size_t mi_good_size(size_t size) mi_attr_noexcept { - if (size <= MI_LARGE_SIZE_MAX) { + if (size <= MI_MEDIUM_SIZE_MAX) { return _mi_bin_size(_mi_bin(size)); } else { @@ -245,7 +245,7 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; @@ -268,7 +268,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ mi_assert_internal(page->heap == NULL); mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(page->block_size == queue->block_size || - (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || + (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); @@ -297,8 +297,8 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || (page->block_size == to->block_size && mi_page_queue_is_full(from)) || (page->block_size == from->block_size && mi_page_queue_is_full(to)) || - (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_full(to))); + (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_full(to))); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; diff --git a/src/page.c b/src/page.c index 9d645b6c..b1fd1e69 100644 --- a/src/page.c +++ b/src/page.c @@ -74,7 +74,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); @@ -102,7 +102,7 @@ bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_MEDIUM_SIZE_MAX || mi_page_is_in_full(page)); mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); } return true; @@ -356,9 +356,9 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_page_set_has_aligned(page, false); // account for huge pages here - if (page->block_size > MI_LARGE_SIZE_MAX) { - if (page->block_size > MI_HUGE_SIZE_MAX) { - _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); + if (page->block_size > MI_MEDIUM_SIZE_MAX) { + if (page->block_size <= MI_LARGE_SIZE_MAX) { + _mi_stat_decrease(&page->heap->tld->stats.large, page->block_size); } else { _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); @@ -554,7 +554,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields size_t page_size; - _mi_segment_page_start(segment, page, block_size, &page_size); + _mi_segment_page_start(segment, page, &page_size); page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); @@ -702,7 +702,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { ----------------------------------------------------------- */ // A huge page is allocated directly without being in a queue -static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { +static mi_page_t* mi_large_page_alloc(mi_heap_t* heap, size_t size) { size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t); mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_queue_t* pq = mi_page_queue(heap,block_size); @@ -711,9 +711,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); - if (page->block_size > MI_HUGE_SIZE_MAX) { - _mi_stat_increase(&heap->tld->stats.giant, block_size); - _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); + if (page->block_size <= MI_LARGE_SIZE_MAX) { + _mi_stat_increase(&heap->tld->stats.large, block_size); + _mi_stat_counter_increase(&heap->tld->stats.large_count, 1); } else { _mi_stat_increase(&heap->tld->stats.huge, block_size); @@ -744,12 +744,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // huge allocation? mi_page_t* page; - if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) { + if (mi_unlikely(size > MI_MEDIUM_SIZE_MAX)) { if (mi_unlikely(size >= (SIZE_MAX - MI_MAX_ALIGN_SIZE))) { page = NULL; } else { - page = mi_huge_page_alloc(heap,size); + page = mi_large_page_alloc(heap,size); } } else { diff --git a/src/segment.c b/src/segment.c index 736345bf..31117857 100644 --- a/src/segment.c +++ b/src/segment.c @@ -15,16 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file /* ----------------------------------------------------------- Segment allocation - We allocate pages inside big OS allocated "segments" - (4mb on 64-bit). This is to avoid splitting VMA's on Linux - and reduce fragmentation on other OS's. Each thread - owns its own segments. - - Currently we have: - - small pages (64kb), 64 in one segment - - medium pages (512kb), 8 in one segment - - large pages (4mb), 1 in one segment - - huge blocks > MI_LARGE_SIZE_MAX (512kb) are directly allocated by the OS + In any case the memory for a segment is virtual and only committed on demand (i.e. we are careful to not touch the memory @@ -35,75 +26,103 @@ terms of the MIT license. A copy of the license can be found in the file be reclaimed by still running threads, much like work-stealing. ----------------------------------------------------------- */ - /* ----------------------------------------------------------- - Queue of segments containing free pages + Bins ----------------------------------------------------------- */ - - -#if (MI_DEBUG>1) -static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { - mi_assert_internal(segment != NULL); - mi_segment_t* list = queue->first; - while (list != NULL) { - if (list == segment) break; - mi_assert_internal(list->next==NULL || list->next->prev == list); - mi_assert_internal(list->prev==NULL || list->prev->next == list); - list = list->next; - } - return (list == segment); +// Use bit scan forward to quickly find the first zero bit if it is available +#if defined(_MSC_VER) +#include +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + #if (MI_INTPTR_SIZE==8) + _BitScanReverse64(&idx, x); + #else + _BitScanReverse(&idx, x); + #endif + return idx; } +#elif defined(__GNUC__) || defined(__clang__) +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); +} +#else +#error "define bsr for your platform" #endif -static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) { - return (queue->first == NULL); +static size_t mi_slice_bin4(size_t slice_count) { + if (slice_count==0) return 0; + mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); + size_t s = mi_bsr(slice_count); + if (s <= 1) return slice_count; + size_t bin = ((s << 1) | (slice_count >> (s - 1))&0x01); + return bin; } -static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) { - mi_assert_expensive(mi_segment_queue_contains(queue, segment)); - if (segment->prev != NULL) segment->prev->next = segment->next; - if (segment->next != NULL) segment->next->prev = segment->prev; - if (segment == queue->first) queue->first = segment->next; - if (segment == queue->last) queue->last = segment->prev; - segment->next = NULL; - segment->prev = NULL; +static size_t mi_slice_bin8(size_t slice_count) { + if (slice_count==0) return 0; + mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); + size_t s = mi_bsr(slice_count); + if (s <= 2) return slice_count; + size_t bin = ((s << 2) | (slice_count >> (s - 2))&0x03) - 5; + return bin; } -static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) { - mi_assert_expensive(!mi_segment_queue_contains(queue, segment)); - segment->next = NULL; - segment->prev = queue->last; - if (queue->last != NULL) { - mi_assert_internal(queue->last->next == NULL); - queue->last->next = segment; - queue->last = segment; - } - else { - queue->last = queue->first = segment; - } +static size_t mi_slice_bin(size_t slice_count) { + mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE); + mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) == MI_SEGMENT_BIN_MAX); + size_t bin = (slice_count==0 ? 0 : mi_slice_bin8(slice_count)); + mi_assert_internal(bin >= 0 && bin <= MI_SEGMENT_BIN_MAX); + return bin; } -static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi_segments_tld_t* tld) { - if (kind == MI_PAGE_SMALL) return &tld->small_free; - else if (kind == MI_PAGE_MEDIUM) return &tld->medium_free; - else return NULL; + +/* ----------------------------------------------------------- + Page Queues +----------------------------------------------------------- */ +static bool mi_page_queue_is_empty(mi_page_queue_t* pq) { + return (pq->first == NULL); } -static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - return mi_segment_free_queue_of_kind(segment->page_kind, tld); +static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq) +{ + mi_page_t* page = pq->first; + if (page==NULL) return NULL; + mi_assert_internal(page->prev==NULL); + pq->first = page->next; + if (page->next == NULL) pq->last = NULL; + else page->next->prev = NULL; + page->next = NULL; + page->prev = NULL; // paranoia + page->block_size = 1; // no more free + return page; } -// remove from free queue if it is in one -static void mi_segment_remove_from_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); // may be NULL - bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); - if (in_queue) { - mi_segment_queue_remove(queue, segment); - } +static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) { + // todo: or push to the end? + mi_assert_internal(page->prev == NULL && page->next==NULL); + page->prev = NULL; // paranoia + page->next = pq->first; + pq->first = page; + if (page->next != NULL) page->next->prev = page; + else pq->last = page; + page->block_size = 0; // free } -static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_enqueue(mi_segment_free_queue(segment, tld), segment); +static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) { + size_t bin = mi_slice_bin(slice_count); + return &tld->pages[bin]; +} + +static void mi_page_queue_remove(mi_page_queue_t* pq, mi_page_t* page) { + mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0); + if (page->prev != NULL) page->prev->next = page->next; + else pq->first = page->next; + if (page->next != NULL) page->next->prev = page->prev; + else pq->last = page->prev; + page->prev = NULL; + page->next = NULL; + page->block_size = 1; // no more free } @@ -112,31 +131,47 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t ----------------------------------------------------------- */ #if (MI_DEBUG > 1) -static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); - bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); - if (in_queue) { - mi_assert_expensive(mi_segment_queue_contains(queue, segment)); +static bool mi_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) { + for (mi_page_t* p = pq->first; p != NULL; p = p->next) { + if (p==page) return true; } - return in_queue; + return false; } -static size_t mi_segment_pagesize(mi_segment_t* segment) { - return ((size_t)1 << segment->page_shift); -} -static bool mi_segment_is_valid(mi_segment_t* segment) { +static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->abandoned <= segment->used); - size_t nfree = 0; - for (size_t i = 0; i < segment->capacity; i++) { - if (!segment->pages[i].segment_in_use) nfree++; + mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); + //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); + mi_slice_t* slice = &segment->slices[0]; + size_t page_count = 0; + mi_page_queue_t* pq; + while(slice < &segment->slices[segment->slice_count]) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + if (slice->block_size > 0) { // a page in use, all slices need their back offset set + page_count++; + for (size_t i = 0; i < slice->slice_count; i++) { + mi_assert_internal((slice+i)->slice_offset == i); + mi_assert_internal(i==0 || (slice+i)->slice_count == 0); + mi_assert_internal(i==0 || (slice+i)->block_size == 1); + } + } + else { // free range of slices; only last slice needs a valid back offset + mi_slice_t* end = slice + slice->slice_count - 1; + mi_assert_internal(slice == end - end->slice_offset); + mi_assert_internal(slice == end || end->slice_count == 0 ); + mi_assert_internal(end->block_size == 0); + if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { + pq = mi_page_queue_for(slice->slice_count,tld); + mi_assert_internal(mi_page_queue_contains(pq,mi_slice_to_page(slice))); + } + } + slice = slice + slice->slice_count; } - mi_assert_internal(nfree + segment->used == segment->capacity); - mi_assert_internal(segment->thread_id == _mi_thread_id()); // or 0 - mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || - (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); + mi_assert_internal(slice == &segment->slices[segment->slice_count]); + mi_assert_internal(page_count == segment->used + 1); return true; } #endif @@ -145,28 +180,32 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { Segment size calculations ----------------------------------------------------------- */ -// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) +// Start of the page available memory; can be used on uninitialized pages +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); - uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; - - if (page->segment_idx == 0) { - // the first page starts after the segment info (and possible guard page) - p += segment->segment_info_size; - psize -= segment->segment_info_size; - // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) - if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { - size_t adjust = block_size - ((uintptr_t)p % block_size); - if (adjust < block_size) { - p += adjust; - psize -= adjust; - } - mi_assert_internal((uintptr_t)p % block_size == 0); - } + mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); + ptrdiff_t idx = slice - segment->slices; + size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; + uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); + /* + if (idx == 0) { + // the first page starts after the segment info (and possible guard page) + p += segment->segment_info_size; + psize -= segment->segment_info_size; + // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + // to ensure this, we over-estimate and align with the OS page size + const size_t asize = _mi_os_page_size(); + uint8_t* q = (uint8_t*)_mi_align_up((uintptr_t)p, _mi_os_page_size()); + if (p < q) { + psize -= (q - p); + p = q; + } + mi_assert_internal((uintptr_t)p % _mi_os_page_size() == 0); } + */ + long secure = mi_option_get(mi_option_secure); - if (secure > 1 || (secure == 1 && page->segment_idx == segment->capacity - 1)) { + if (secure > 1 || (secure == 1 && slice == &segment->slices[segment->slice_count - 1])) { // secure == 1: the last page has an os guard page at the end // secure > 1: every page has an os guard page psize -= _mi_os_page_size(); @@ -178,34 +217,23 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { - /* - if (mi_option_is_enabled(mi_option_secure)) { - // always reserve maximally so the protection falls on - // the same address area, as we need to reuse them from the caches interchangably. - capacity = MI_SMALL_PAGES_PER_SEGMENT; - } - */ - size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; +static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_size) { + size_t page_size = _mi_os_page_size(); + size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); size_t guardsize = 0; - size_t isize = 0; - - if (!mi_option_is_enabled(mi_option_secure)) { - // normally no guard pages - isize = _mi_align_up(minsize, (16 > MI_MAX_ALIGN_SIZE ? 16 : MI_MAX_ALIGN_SIZE)); - } - else { + + if (mi_option_is_enabled(mi_option_secure)) { // in secure mode, we set up a protected page in between the segment info // and the page data (and one at the end of the segment) - size_t page_size = _mi_os_page_size(); - isize = _mi_align_up(minsize, page_size); - guardsize = page_size; - required = _mi_align_up(required, page_size); + guardsize = page_size; + required = _mi_align_up(required, page_size); } ; if (info_size != NULL) *info_size = isize; - if (pre_size != NULL) *pre_size = isize + guardsize; - return (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_PAGE_HUGE_ALIGN) ); + if (pre_size != NULL) *pre_size = isize + guardsize; + size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_SEGMENT_SLICE_SIZE) ); + mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); + return segment_size; } @@ -229,15 +257,15 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); if (mi_option_is_enabled(mi_option_secure)) { - _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->stats); + _mi_os_free(segment, segment_size, /*segment->memid,*/ tld->stats); } // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -// and no more than 4. -#define MI_SEGMENT_CACHE_MAX (4) +// and no more than 1. +#define MI_SEGMENT_CACHE_MAX (1) #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset @@ -270,14 +298,13 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) { } static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); mi_assert_internal(segment->next == NULL); if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { return false; } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); + _mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); } segment->next = tld->cache; tld->cache = segment; @@ -297,64 +324,119 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } +/* ----------------------------------------------------------- + Slices +----------------------------------------------------------- */ + + +static uint8_t* mi_slice_start(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + return ((uint8_t*)segment + (mi_slice_index(slice)*MI_SEGMENT_SLICE_SIZE)); +} + +static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) { + return &segment->slices[segment->slice_count-1]; +} + +/* ----------------------------------------------------------- + Page management +----------------------------------------------------------- */ + + +static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(slice_index >= 0 && slice_index < segment->slice_count); + size_t bin = mi_slice_bin(slice_count); + if (slice_count==0) slice_count = 1; + mi_assert_internal(slice_count >= 0 && slice_index + slice_count - 1 < segment->slice_count); + + // set first and last slice (the intermediates can be undetermined) + mi_slice_t* slice = &segment->slices[slice_index]; + slice->slice_count = slice_count; + slice->slice_offset = 0; + if (slice_count > 1) { + mi_slice_t* end = &segment->slices[slice_index + slice_count - 1]; + end->slice_count = 0; + end->slice_offset = (uint16_t)slice_count - 1; + end->block_size = 0; + } + // and push it on the free page queue + mi_page_queue_push( &tld->pages[bin], mi_slice_to_page(slice) ); +} + +static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) { + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0); + size_t slice_index = mi_slice_index(mi_page_to_slice(page)); + mi_segment_page_init(segment,slice_index,page->slice_count,tld); +} + + +static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(page->slice_count >= slice_count); + mi_assert_internal(page->block_size > 0); // no more in free queue + if (page->slice_count <= slice_count) return; + mi_segment_t* segment = _mi_page_segment(page); + size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count; + size_t next_count = page->slice_count - slice_count; + mi_segment_page_init( segment, next_index, next_count, tld ); + page->slice_count = slice_count; +} + +static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { + // search from best fit up + mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld); + if (slice_count == 0) slice_count = 1; + while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX] && mi_page_queue_is_empty(pq)) { + pq++; + } + if (pq > &tld->pages[MI_SEGMENT_BIN_MAX]) { + // could not find a page.. + return NULL; + } + + // pop the page and split to the right size + mi_page_t* page = mi_page_queue_pop(pq); + mi_assert_internal(page != NULL && page->slice_count >= slice_count && page->slice_offset == 0); + if (page->slice_count > slice_count) { + mi_segment_page_split(page, slice_count, tld); + } + mi_assert_internal(page != NULL && page->slice_count == slice_count); + return page; +} + +static void mi_segment_page_remove(mi_slice_t* slice, mi_segments_tld_t* tld) { + mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0); + mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld); + mi_page_queue_remove(pq, mi_slice_to_page(slice)); +} + + /* ----------------------------------------------------------- Segment allocation ----------------------------------------------------------- */ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { // calculate needed sizes first - size_t capacity; - if (page_kind == MI_PAGE_HUGE) { - mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0); - capacity = 1; - } - else { - mi_assert_internal(required == 0); - size_t page_size = (size_t)1 << page_shift; - capacity = MI_SEGMENT_SIZE / page_size; - mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); - mi_assert_internal(capacity >= 1 && capacity <= MI_SMALL_PAGES_PER_SEGMENT); - } size_t info_size; size_t pre_size; - size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); + size_t segment_size = mi_segment_size(required, &pre_size, &info_size); + size_t slice_count = segment_size / MI_SEGMENT_SLICE_SIZE; mi_assert_internal(segment_size >= required); - size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); + //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0); // Try to get it from our thread local cache first - bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); - bool protection_still_good = false; + bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit) + || required > 0; // huge page mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); - if (segment != NULL) { - if (mi_option_is_enabled(mi_option_secure)) { - if (segment->page_kind != page_kind) { - _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs - } - else { - protection_still_good = true; // otherwise, the guard pages are still in place - } - } - if (!mi_option_is_enabled(mi_option_eager_commit)) { - if (page_kind > MI_PAGE_MEDIUM) { - _mi_mem_commit(segment, segment->segment_size, tld->stats); - } - else { - // ok, commit (and unreset) on demand again - } - } - else if (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset)) { - _mi_mem_unreset(segment, segment->segment_size, tld->stats); - } - } - else { + if (segment==NULL) { // Allocate the segment from the OS - size_t memid; - segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &memid, os_tld); + size_t memid = 0; + segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, /* &memid,*/ os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { - _mi_mem_commit(segment, info_size, tld->stats); + _mi_os_commit(segment, info_size, tld->stats); } segment->memid = memid; mi_segments_track_size((long)segment_size, tld); @@ -367,65 +449,73 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->memid = memid; } - if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) { + if (mi_option_is_enabled(mi_option_secure)) { // in secure mode, we set up a protected page in between the segment info // and the page data - mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); + mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); + _mi_os_protect((uint8_t*)segment + info_size, (pre_size - info_size)); size_t os_page_size = _mi_os_page_size(); - if (mi_option_get(mi_option_secure) <= 1) { - // and protect the last page too - _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); + // and protect the last page too + _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); + slice_count--; // don't use the last slice :-( + } + + // initialize segment info + segment->segment_size = segment_size; + segment->segment_info_size = pre_size; + segment->thread_id = _mi_thread_id(); + segment->cookie = _mi_ptr_cookie(segment); + segment->slice_count = slice_count; + segment->all_committed = commit; + segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); + _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + + // reserve first slices for segment info + size_t islice_count = (segment->segment_info_size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE; + for (size_t i = 0; i < islice_count; i++) { + mi_slice_t* slice = &segment->slices[i]; + if (i==0) { + slice->slice_count = islice_count; + slice->block_size = islice_count * MI_SEGMENT_SLICE_SIZE; } else { - // protect every page - for (size_t i = 0; i < capacity; i++) { - _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); - } + slice->slice_offset = (uint16_t)i; + slice->block_size = 1; } } - segment->page_kind = page_kind; - segment->capacity = capacity; - segment->page_shift = page_shift; - segment->segment_size = segment_size; - segment->segment_info_size = pre_size; - segment->thread_id = _mi_thread_id(); - segment->cookie = _mi_ptr_cookie(segment); - for (uint8_t i = 0; i < segment->capacity; i++) { - segment->pages[i].segment_idx = i; - segment->pages[i].is_reset = false; - segment->pages[i].is_committed = commit; + // initialize initial free pages + if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page + mi_segment_page_init(segment, islice_count, segment->slice_count - islice_count, tld); } - _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); - //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { - UNUSED(force); - //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); - mi_assert(segment != NULL); - mi_segment_remove_from_free_queue(segment,tld); + mi_assert_internal(segment != NULL); + mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->prev == NULL); + mi_assert_internal(segment->used == 0); - mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); - mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); - mi_assert(segment->next == NULL); - mi_assert(segment->prev == NULL); - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - - // update reset memory statistics - /* - for (uint8_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->is_reset) { - page->is_reset = false; - mi_stat_decrease( tld->stats->reset,mi_page_size(page)); + // Remove the free pages + mi_slice_t* slice = &segment->slices[0]; + size_t page_count = 0; + while (slice < mi_segment_last_slice(segment)) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages .. + if (slice->block_size == 0) { + mi_segment_page_remove(slice, tld); } + page_count++; + slice = slice + slice->slice_count; } - */ + mi_assert_internal(page_count == 2); // first page is allocated by the segment itself + // stats + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } @@ -436,100 +526,143 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t } /* ----------------------------------------------------------- - Free page management inside a segment + Page allocation ----------------------------------------------------------- */ +static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(required <= MI_LARGE_SIZE_MAX && page_kind <= MI_PAGE_LARGE); -static bool mi_segment_has_free(const mi_segment_t* segment) { - return (segment->used < segment->capacity); -} + // find a free page + size_t page_size = _mi_align_up(required,MI_SEGMENT_SLICE_SIZE); + size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; + mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); + if (page==NULL) { + // no free page, allocate a new segment and try again + if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL; // OOM + return mi_segment_page_alloc(page_kind, required, tld, os_tld); + } + mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { - mi_assert_internal(mi_segment_has_free(segment)); - mi_assert_expensive(mi_segment_is_valid(segment)); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use) { - if (page->is_reset || !page->is_committed) { - size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - mi_assert_internal(!(page->is_reset && !page->is_committed)); - if (!page->is_committed) { - page->is_committed = true; - _mi_mem_commit(start,psize,stats); - } - if (page->is_reset) { - page->is_reset = false; - _mi_mem_unreset(start, psize, stats); - } - } - return page; + // set slice back pointers and commit/unreset + mi_segment_t* segment = _mi_page_segment(page); + mi_slice_t* slice = mi_page_to_slice(page); + bool commit = false; + bool unreset = false; + for (size_t i = 0; i < page->slice_count; i++, slice++) { + slice->slice_offset = (uint16_t)i; + slice->block_size = 1; + if (i > 0) slice->slice_count = 0; + if (!segment->all_committed && !slice->is_committed) { + slice->is_committed = true; + commit = true; + } + if (slice->is_reset) { + slice->is_reset = false; + unreset = true; } } - mi_assert(false); - return NULL; + uint8_t* page_start = mi_slice_start(mi_page_to_slice(page)); + if(commit) { _mi_os_commit(page_start, page_size, tld->stats); } + if(unreset){ _mi_os_unreset(page_start, page_size, tld->stats); } + + // initialize the page and return + mi_assert_internal(segment->thread_id == _mi_thread_id()); + segment->used++; + mi_page_init_flags(page, segment->thread_id); + return page; +} + +static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page != NULL && page->slice_count > 0 && page->slice_offset == 0 && page->block_size > 0); + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_internal(segment->used > 0); + segment->used--; + + // free and coalesce the page + mi_slice_t* slice = mi_page_to_slice(page); + size_t slice_count = slice->slice_count; + mi_slice_t* next = slice + slice->slice_count; + mi_assert_internal(next <= mi_segment_last_slice(segment) + 1); + if (next <= mi_segment_last_slice(segment) && next->block_size==0) { + // free next block -- remove it from free and merge + mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); + slice_count += next->slice_count; // extend + mi_segment_page_remove(next, tld); + } + if (slice > segment->slices) { + mi_slice_t* prev = slice - 1; + prev = prev - prev->slice_offset; + mi_assert_internal(prev >= segment->slices); + if (prev->block_size==0) { + // free previous slice -- remove it from free and merge + mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); + slice_count += prev->slice_count; + mi_segment_page_remove(prev, tld); + slice = prev; + } + } + + // and add the new free page + mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); } /* ----------------------------------------------------------- - Free + Page Free ----------------------------------------------------------- */ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); - mi_assert_internal(page->segment_in_use); +static void mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page->block_size > 0); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(page->is_committed); + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment->all_committed || page->is_committed); size_t inuse = page->capacity * page->block_size; - _mi_stat_decrease(&stats->page_committed, inuse); - _mi_stat_decrease(&stats->pages, 1); + _mi_stat_decrease(&tld->stats->page_committed, inuse); + _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; - _mi_mem_reset(start, psize, stats); + _mi_os_reset(start, psize, tld->stats); } // zero the page data - uint8_t idx = page->segment_idx; // don't clear the index - bool is_reset = page->is_reset; // don't clear the reset flag + size_t slice_count = page->slice_count; // don't clear the slice_count + bool is_reset = page->is_reset; // don't clear the reset flag bool is_committed = page->is_committed; // don't clear the commit flag memset(page, 0, sizeof(*page)); - page->segment_idx = idx; - page->segment_in_use = false; + page->slice_count = slice_count; page->is_reset = is_reset; page->is_committed = is_committed; - segment->used--; + page->block_size = 1; + + // and free it + mi_segment_page_free_coalesce(page, tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); // mark it as free now - mi_segment_page_clear(segment, page, tld->stats); + mi_segment_page_clear(page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment mi_segment_free(segment, force, tld); } - else { - if (segment->used == segment->abandoned) { - // only abandoned pages; remove from free list and abandon - mi_segment_abandon(segment,tld); - } - else if (segment->used + 1 == segment->capacity) { - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // for now we only support small and medium pages - // move back to segments free list - mi_segment_insert_in_free_queue(segment,tld); - } - } + else if (segment->used == segment->abandoned) { + // only abandoned pages; remove from free list and abandon + mi_segment_abandon(segment,tld); + } } @@ -548,10 +681,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); - // remove the segment from the free page queue if needed - mi_segment_remove_from_free_queue(segment,tld); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + // all pages in the segment are abandoned; add it to the abandoned list segment->thread_id = 0; do { @@ -565,7 +696,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); @@ -583,7 +714,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated) - if (atmost < 8) atmost = 8; // but at least 8 + if (atmost < 2) atmost = 2; // but at least 2 } // for `atmost` `reclaimed` abandoned segments... @@ -597,42 +728,44 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen // got it. mi_atomic_decrement(&abandoned_count); - segment->thread_id = _mi_thread_id(); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned_next = NULL; mi_segments_track_size((long)segment->segment_size,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); _mi_stat_decrease(&tld->stats->segments_abandoned,1); - // add its abandoned pages to the current thread - mi_assert(segment->abandoned == segment->used); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->segment_in_use) { + mi_slice_t* slice = &segment->slices[0]; + while (slice < mi_segment_last_slice(segment)) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + mi_page_t* page = mi_slice_to_page(slice); + slice = slice + slice->slice_count; + if (page->block_size > 0) { // a page in use segment->abandoned--; - mi_assert(page->next == NULL); + mi_assert_internal(page->next == NULL && page->prev==NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); if (mi_page_all_free(page)) { // if everything free by now, free the page - mi_segment_page_clear(segment,page,tld->stats); + mi_segment_page_clear(page, tld); } else { // otherwise reclaim it - mi_page_init_flags(page,segment->thread_id); - _mi_page_reclaim(heap,page); + mi_page_init_flags(page, segment->thread_id); + _mi_page_reclaim(heap, page); } } + else { // free range of slices; add to the free pages + mi_segment_page_add_free(page,tld); + } } + mi_assert(segment->abandoned == 0); + segment->thread_id = _mi_thread_id(); // only now for valid checks if (segment->used == 0) { // due to page_clear mi_segment_free(segment,false,tld); } else { - reclaimed++; - // add its free pages to the the current thread free small segment queue - if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { - mi_segment_insert_in_free_queue(segment,tld); - } + reclaimed++; } } return (reclaimed>0); @@ -643,64 +776,16 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen Small page allocation ----------------------------------------------------------- */ -// Allocate a small page inside a segment. -// Requires that the page has free pages -static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld->stats); - page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); - segment->used++; - mi_assert_internal(segment->used <= segment->capacity); - if (segment->used == segment->capacity) { - // if no more free pages, remove from the queue - mi_assert_internal(!mi_segment_has_free(segment)); - mi_segment_remove_from_free_queue(segment,tld); - } - return page; -} - -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld); - if (mi_segment_queue_is_empty(free_queue)) { - mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld); - if (segment == NULL) return NULL; - mi_segment_enqueue(free_queue, segment); - } - mi_assert_internal(free_queue->first != NULL); - return mi_segment_page_alloc_in(free_queue->first,tld); -} - -static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); -} - -static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); -} - -/* ----------------------------------------------------------- - large page allocation ------------------------------------------------------------ */ - -static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); - if (segment == NULL) return NULL; - segment->used = 1; - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); - return page; -} - static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); + mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld); if (segment == NULL) return NULL; mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); segment->used = 1; - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + mi_page_t* page = mi_slice_to_page(&segment->slices[0]); + page->slice_count = segment->slice_count; + page->slice_offset = 0; + page->block_size = size; mi_page_init_flags(page,segment->thread_id); return page; } @@ -708,25 +793,144 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld /* ----------------------------------------------------------- Page allocation and free ----------------------------------------------------------- */ +/* static bool mi_is_good_fit(size_t bsize, size_t size) { // good fit if no more than 25% wasted return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4)); } +*/ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) { - page = mi_segment_small_page_alloc(tld,os_tld); + if (block_size <= MI_SMALL_SIZE_MAX) {// || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) { + page = mi_segment_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld); } - else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { - page = mi_segment_medium_page_alloc(tld, os_tld); + else if (block_size <= MI_MEDIUM_SIZE_MAX) {// || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { + page = mi_segment_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld); } - else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) { - page = mi_segment_large_page_alloc(tld, os_tld); + else if (block_size <= MI_LARGE_SIZE_MAX) { + page = mi_segment_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld); } else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); return page; } + + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (128mb) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ + +#if (MI_INTPTR_SIZE==8) +#define MI_MAX_ADDRESS ((size_t)1 << 44) // 16TB +#else +#define MI_MAX_ADDRESS ((size_t)1 << 31) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 1KiB per TB with 128MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB? + uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; + *bitidx = segindex % (8*MI_INTPTR_SIZE); + return (segindex / (8*MI_INTPTR_SIZE)); +} + +static void mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + if (index==0) return; + uintptr_t mask; + uintptr_t newmask; + do { + mask = mi_segment_map[index]; + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_compare_exchange(&mi_segment_map[index], newmask, mask)); +} + +static void mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + if (index == 0) return; + uintptr_t mask; + uintptr_t newmask; + do { + mask = mi_segment_map[index]; + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_compare_exchange(&mi_segment_map[index], newmask, mask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + mi_segment_t* segment = _mi_ptr_segment(p); + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first 4MiB in huge + if (mi_likely((mi_segment_map[index] & ((uintptr_t)1 << bitidx)) != 0)) { + return segment; // yes, allocated by us + } + if (index==0) return NULL; + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in 256MiB steps trough valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mi_segment_map[index] & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = _mi_bsr(lobits); + } + else { + loindex = index - 1; + while (loindex > 0 && mi_segment_map[loindex] == 0) loindex--; + if (loindex==0) return NULL; + lobitidx = _mi_bsr(mi_segment_map[loindex]); + } + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if (mi_unlikely(!cookie_ok)) return NULL; + if (((uint8_t*)segment + segment->segment_size) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + segment->segment_size); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return (_mi_segment_of(p) != NULL); +} + +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } +} + +bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + diff --git a/src/stats.c b/src/stats.c index e7d398b2..a9a022fb 100644 --- a/src/stats.c +++ b/src/stats.c @@ -106,11 +106,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->malloc, &src->malloc, 1); mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); mi_stat_add(&stats->huge, &src->huge, 1); - mi_stat_add(&stats->giant, &src->giant, 1); + mi_stat_add(&stats->large, &src->large, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); - mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1); + mi_stat_counter_add(&stats->large_count, &src->large_count, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) { @@ -232,11 +232,11 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); mi_stat_print(&normal, "normal", 1, out); mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); - mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out); + mi_stat_print(&stats->large, "giant", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &stats->huge, 1); - mi_stat_add(&total, &stats->giant, 1); + mi_stat_add(&total, &stats->large, 1); mi_stat_print(&total, "total", 1, out); _mi_fprintf(out, "malloc requested: "); mi_print_amount(stats->malloc.allocated, 1, out); diff --git a/test/main-override-static.c b/test/main-override-static.c index 6ddf4f37..7f20268a 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -6,8 +6,168 @@ #include #include // redefines malloc etc. +#include +#include + +#define MI_INTPTR_SIZE 8 +#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) + +#define MI_BIN_HUGE 100 +//#define MI_ALIGN2W + +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +#include +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse((DWORD*)&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; + #if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); + #elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); + #else + # error "define bsr for non-32 or 64-bit platforms" + #endif +} + +static inline size_t _mi_wsize_from_size(size_t size) { + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +extern inline uint8_t _mi_bin8(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } + #if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } + #endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + #if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes + #endif + wsize--; + // find the highest bit + uint8_t b = mi_bsr32((uint32_t)wsize); + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + } + return bin; +} + +extern inline uint8_t _mi_bin4(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } + #if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } + #endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + uint8_t b = mi_bsr32((uint32_t)wsize); + bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; + } + return bin; +} + +size_t _mi_binx4(size_t bsize) { + if (bsize==0) return 0; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 1) return bsize; + size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); + return bin; +} + +size_t _mi_binx8(size_t bsize) { + if (bsize==0) return 0; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 2) return bsize; + size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; + return bin; +} + +void mi_bins() { + //printf(" QNULL(1), /* 0 */ \\\n "); + size_t last_bin = 1; + for (size_t bsize = 0; bsize < 8*1024; bsize++) { + size_t size = bsize * 64 * 1024; + size_t bin = _mi_binx8(bsize); + if (bin != last_bin) { + printf("bsize: %6zd, size: %6zd, bin: %6zd\n", bsize, size, bin); + //printf("QNULL(%6zd), ", wsize); + //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); + last_bin = bin; + } + } +} + int main() { mi_version(); + mi_bins(); void* p1 = malloc(78); void* p2 = malloc(24); free(p1); @@ -25,7 +185,7 @@ int main() { //p1 = mi_malloc(32); //free(p1); //p2 = malloc(32); - //mi_free(p2); + //mi_free(p2); mi_stats_print(NULL); return 0; } From f2bafbc57f0604c74bf47fbd105d16a7bb951bcc Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 15 Aug 2019 11:49:56 -0700 Subject: [PATCH 002/352] wip: new segment allocation --- CMakeLists.txt | 1 - include/mimalloc-internal.h | 2 +- include/mimalloc-types.h | 2 +- src/heap.c | 2 +- src/init.c | 13 +-- src/page.c | 2 +- src/segment.c | 156 ++++++++++++++++++++++-------------- src/static.c | 2 +- src/stats.c | 2 +- test/main-override-static.c | 13 ++- 10 files changed, 116 insertions(+), 79 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 39a671a0..a5be39b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,6 @@ set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources src/stats.c src/os.c - src/memory.c src/segment.c src/page.c src/alloc.c diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f17d8af0..e8fa1ba1 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -271,7 +271,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { // Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); + mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE); uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT; mi_assert_internal(idx < segment->slice_count); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 7e14daca..f4042e60 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -375,7 +375,7 @@ typedef struct mi_segment_queue_s { mi_segment_t* last; } mi_segment_queue_t; -#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SEGMENT_SIZE) +#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT) // Segments thread local data typedef struct mi_segments_tld_s { diff --git a/src/heap.c b/src/heap.c index 7b5d7a07..69084731 100644 --- a/src/heap.c +++ b/src/heap.c @@ -255,7 +255,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ } #if (MI_STAT>1) size_t inuse = page->used - page->thread_freed; - if (page->block_size <= MI_MEDIUM_SIZE_MAX) { + if (page->block_size <= MI_LARGE_SIZE_MAX) { mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); } mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... diff --git a/src/init.c b/src/init.c index ff0fa76c..d48c1a30 100644 --- a/src/init.c +++ b/src/init.c @@ -92,13 +92,14 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; // Empty page queues for every bin +#define SQNULL(sz) { NULL, NULL, sz } #define MI_SEGMENT_PAGE_QUEUES_EMPTY \ - { QNULL(0), \ - QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ - QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ - QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ - QNULL( 160), QNULL( 192), QNULL( 224), /* 27 */ } - + { SQNULL(1), \ + SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \ + SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \ + SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \ + SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ + SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) diff --git a/src/page.c b/src/page.c index b1fd1e69..17dd98fb 100644 --- a/src/page.c +++ b/src/page.c @@ -99,7 +99,7 @@ bool _mi_page_is_valid(mi_page_t* page) { #endif if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == page->heap->thread_id); mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_MEDIUM_SIZE_MAX || mi_page_is_in_full(page)); diff --git a/src/segment.c b/src/segment.c index 31117857..b70dc664 100644 --- a/src/segment.c +++ b/src/segment.c @@ -13,6 +13,9 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_PAGE_HUGE_ALIGN (256*1024) +static void mi_segment_map_allocated_at(const mi_segment_t* segment); +static void mi_segment_map_freed_at(const mi_segment_t* segment); + /* ----------------------------------------------------------- Segment allocation @@ -50,21 +53,13 @@ static inline size_t mi_bsr(uintptr_t x) { #error "define bsr for your platform" #endif -static size_t mi_slice_bin4(size_t slice_count) { - if (slice_count==0) return 0; - mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); - size_t s = mi_bsr(slice_count); - if (s <= 1) return slice_count; - size_t bin = ((s << 1) | (slice_count >> (s - 1))&0x01); - return bin; -} - static size_t mi_slice_bin8(size_t slice_count) { - if (slice_count==0) return 0; + if (slice_count<=1) return slice_count; mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); + slice_count--; size_t s = mi_bsr(slice_count); - if (s <= 2) return slice_count; - size_t bin = ((s << 2) | (slice_count >> (s - 2))&0x03) - 5; + if (s <= 2) return slice_count + 1; + size_t bin = ((s << 2) | ((slice_count >> (s - 2))&0x03)) - 4; return bin; } @@ -72,7 +67,7 @@ static size_t mi_slice_bin(size_t slice_count) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE); mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) == MI_SEGMENT_BIN_MAX); size_t bin = (slice_count==0 ? 0 : mi_slice_bin8(slice_count)); - mi_assert_internal(bin >= 0 && bin <= MI_SEGMENT_BIN_MAX); + mi_assert_internal(bin <= MI_SEGMENT_BIN_MAX); return bin; } @@ -80,6 +75,7 @@ static size_t mi_slice_bin(size_t slice_count) { /* ----------------------------------------------------------- Page Queues ----------------------------------------------------------- */ +/* static bool mi_page_queue_is_empty(mi_page_queue_t* pq) { return (pq->first == NULL); } @@ -97,6 +93,7 @@ static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq) page->block_size = 1; // no more free return page; } +*/ static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) { // todo: or push to the end? @@ -111,15 +108,18 @@ static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) { static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) { size_t bin = mi_slice_bin(slice_count); - return &tld->pages[bin]; + mi_page_queue_t* pq = &tld->pages[bin]; + // mi_assert_internal(pq->block_size >= slice_count); + return pq; } -static void mi_page_queue_remove(mi_page_queue_t* pq, mi_page_t* page) { +static void mi_page_queue_delete(mi_page_queue_t* pq, mi_page_t* page) { mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0); + // should work too if the queue does not contain page (which can happen during reclaim) if (page->prev != NULL) page->prev->next = page->next; - else pq->first = page->next; + if (page == pq->first) pq->first = page->next; if (page->next != NULL) page->next->prev = page->prev; - else pq->last = page->prev; + if (page == pq->last) pq->last = page->prev; page->prev = NULL; page->next = NULL; page->block_size = 1; // no more free @@ -145,13 +145,13 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; - size_t page_count = 0; + size_t used_count = 0; mi_page_queue_t* pq; while(slice < &segment->slices[segment->slice_count]) { mi_assert_internal(slice->slice_count > 0); - mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(slice->slice_offset == 0); if (slice->block_size > 0) { // a page in use, all slices need their back offset set - page_count++; + used_count++; for (size_t i = 0; i < slice->slice_count; i++) { mi_assert_internal((slice+i)->slice_offset == i); mi_assert_internal(i==0 || (slice+i)->slice_count == 0); @@ -171,7 +171,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } mi_assert_internal(slice == &segment->slices[segment->slice_count]); - mi_assert_internal(page_count == segment->used + 1); + mi_assert_internal(used_count == segment->used + 1); return true; } #endif @@ -255,6 +255,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { segment->thread_id = 0; + mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)segment_size),tld); if (mi_option_is_enabled(mi_option_secure)) { _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set @@ -265,7 +266,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, // and no more than 1. -#define MI_SEGMENT_CACHE_MAX (1) +#define MI_SEGMENT_CACHE_MAX (2) #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset @@ -344,10 +345,10 @@ static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) { static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { - mi_assert_internal(slice_index >= 0 && slice_index < segment->slice_count); - size_t bin = mi_slice_bin(slice_count); + mi_assert_internal(slice_index < segment->slice_count); + mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld); if (slice_count==0) slice_count = 1; - mi_assert_internal(slice_count >= 0 && slice_index + slice_count - 1 < segment->slice_count); + mi_assert_internal(slice_index + slice_count - 1 < segment->slice_count); // set first and last slice (the intermediates can be undetermined) mi_slice_t* slice = &segment->slices[slice_index]; @@ -360,7 +361,7 @@ static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size end->block_size = 0; } // and push it on the free page queue - mi_page_queue_push( &tld->pages[bin], mi_slice_to_page(slice) ); + mi_page_queue_push( pq, mi_slice_to_page(slice) ); } static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) { @@ -368,6 +369,7 @@ static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0); size_t slice_index = mi_slice_index(mi_page_to_slice(page)); mi_segment_page_init(segment,slice_index,page->slice_count,tld); + } @@ -386,28 +388,28 @@ static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tl // search from best fit up mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld); if (slice_count == 0) slice_count = 1; - while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX] && mi_page_queue_is_empty(pq)) { + while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX]) { + for( mi_page_t* page = pq->first; page != NULL; page = page->next) { + if (page->slice_count >= slice_count) { + // found one + mi_page_queue_delete(pq,page); + if (page->slice_count > slice_count) { + mi_segment_page_split(page,slice_count,tld); + } + mi_assert_internal(page != NULL && page->slice_count == slice_count); + return page; + } + } pq++; } - if (pq > &tld->pages[MI_SEGMENT_BIN_MAX]) { - // could not find a page.. - return NULL; - } - - // pop the page and split to the right size - mi_page_t* page = mi_page_queue_pop(pq); - mi_assert_internal(page != NULL && page->slice_count >= slice_count && page->slice_offset == 0); - if (page->slice_count > slice_count) { - mi_segment_page_split(page, slice_count, tld); - } - mi_assert_internal(page != NULL && page->slice_count == slice_count); - return page; + // could not find a page.. + return NULL; } -static void mi_segment_page_remove(mi_slice_t* slice, mi_segments_tld_t* tld) { +static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) { mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0); mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld); - mi_page_queue_remove(pq, mi_slice_to_page(slice)); + mi_page_queue_delete(pq, mi_slice_to_page(slice)); } @@ -440,6 +442,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m } segment->memid = memid; mi_segments_track_size((long)segment_size, tld); + mi_segment_map_allocated_at(segment); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); @@ -506,7 +509,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages .. if (slice->block_size == 0) { - mi_segment_page_remove(slice, tld); + mi_segment_page_delete(slice, tld); } page_count++; slice = slice + slice->slice_count; @@ -573,7 +576,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require return page; } -static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) { +static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page != NULL && page->slice_count > 0 && page->slice_offset == 0 && page->block_size > 0); mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->used > 0); @@ -588,7 +591,7 @@ static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tl // free next block -- remove it from free and merge mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); slice_count += next->slice_count; // extend - mi_segment_page_remove(next, tld); + mi_segment_page_delete(next, tld); } if (slice > segment->slices) { mi_slice_t* prev = slice - 1; @@ -598,7 +601,7 @@ static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tl // free previous slice -- remove it from free and merge mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); slice_count += prev->slice_count; - mi_segment_page_remove(prev, tld); + mi_segment_page_delete(prev, tld); slice = prev; } } @@ -606,6 +609,7 @@ static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tl // and add the new free page mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld); mi_assert_expensive(mi_segment_is_valid(segment,tld)); + return slice; } @@ -615,7 +619,7 @@ static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tl static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { +static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->block_size > 0); mi_assert_internal(mi_page_all_free(page)); mi_segment_t* segment = _mi_ptr_segment(page); @@ -643,7 +647,7 @@ static void mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { page->block_size = 1; // and free it - mi_segment_page_free_coalesce(page, tld); + return mi_segment_page_free_coalesce(page, tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -682,8 +686,20 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); mi_assert_expensive(mi_segment_is_valid(segment,tld)); - - // all pages in the segment are abandoned; add it to the abandoned list + + // remove the free pages from our lists + mi_slice_t* slice = &segment->slices[0]; + while (slice < mi_segment_last_slice(segment)) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + if (slice->block_size == 0) { // a free page + mi_segment_page_delete(slice,tld); + slice->block_size = 0; // but keep it free + } + slice = slice + slice->slice_count; + } + + // add it to the abandoned list segment->thread_id = 0; do { segment->abandoned_next = (mi_segment_t*)abandoned; @@ -730,37 +746,50 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_atomic_decrement(&abandoned_count); mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned_next = NULL; + segment->thread_id = _mi_thread_id(); mi_segments_track_size((long)segment->segment_size,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); _mi_stat_decrease(&tld->stats->segments_abandoned,1); mi_slice_t* slice = &segment->slices[0]; - while (slice < mi_segment_last_slice(segment)) { + mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page + slice = slice + slice->slice_count; // skip the first segment allocated page + while (slice <= mi_segment_last_slice(segment)) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); mi_page_t* page = mi_slice_to_page(slice); + if (page->block_size == 0) { // a free page, add it to our lists + mi_segment_page_add_free(page,tld); + } slice = slice + slice->slice_count; - if (page->block_size > 0) { // a page in use - segment->abandoned--; + } + + slice = &segment->slices[0]; + mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page + slice = slice + slice->slice_count; // skip the first segment allocated page + while (slice <= mi_segment_last_slice(segment)) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + mi_page_t* page = mi_slice_to_page(slice); + if (page->block_size > 0) { // a used page mi_assert_internal(page->next == NULL && page->prev==NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + segment->abandoned--; if (mi_page_all_free(page)) { // if everything free by now, free the page - mi_segment_page_clear(page, tld); + slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing } else { // otherwise reclaim it mi_page_init_flags(page, segment->thread_id); _mi_page_reclaim(heap, page); } - } - else { // free range of slices; add to the free pages - mi_segment_page_add_free(page,tld); - } + } + mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0); + slice = slice + slice->slice_count; } mi_assert(segment->abandoned == 0); - segment->thread_id = _mi_thread_id(); // only now for valid checks if (segment->used == 0) { // due to page_clear mi_segment_free(segment,false,tld); } @@ -917,6 +946,11 @@ static bool mi_is_valid_pointer(const void* p) { return (_mi_segment_of(p) != NULL); } +bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + +/* // Return the full segment range belonging to a pointer static void* mi_segment_range_of(const void* p, size_t* size) { mi_segment_t* segment = _mi_segment_of(p); @@ -929,8 +963,6 @@ static void* mi_segment_range_of(const void* p, size_t* size) { return segment; } } +*/ -bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - return mi_is_valid_pointer(p); -} diff --git a/src/static.c b/src/static.c index f1656fa9..df906e04 100644 --- a/src/static.c +++ b/src/static.c @@ -15,7 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file // functions (on Unix's). #include "stats.c" #include "os.c" -#include "memory.c" +//#include "memory.c" #include "segment.c" #include "page.c" #include "heap.c" diff --git a/src/stats.c b/src/stats.c index a9a022fb..aa0c393b 100644 --- a/src/stats.c +++ b/src/stats.c @@ -231,8 +231,8 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n mi_stat_count_t normal = { 0,0,0,0 }; mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); mi_stat_print(&normal, "normal", 1, out); + mi_stat_print(&stats->large, "large", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out); mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); - mi_stat_print(&stats->large, "giant", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &stats->huge, 1); diff --git a/test/main-override-static.c b/test/main-override-static.c index 7f20268a..fce319fb 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -143,7 +143,7 @@ size_t _mi_binx4(size_t bsize) { } size_t _mi_binx8(size_t bsize) { - if (bsize==0) return 0; + if (bsize<=1) return bsize; uint8_t b = mi_bsr32((uint32_t)bsize); if (b <= 2) return bsize; size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; @@ -152,16 +152,20 @@ size_t _mi_binx8(size_t bsize) { void mi_bins() { //printf(" QNULL(1), /* 0 */ \\\n "); - size_t last_bin = 1; - for (size_t bsize = 0; bsize < 8*1024; bsize++) { + size_t last_bin = 0; + size_t min_bsize = 0; + size_t last_bsize = 0; + for (size_t bsize = 1; bsize < 2*1024; bsize++) { size_t size = bsize * 64 * 1024; size_t bin = _mi_binx8(bsize); if (bin != last_bin) { - printf("bsize: %6zd, size: %6zd, bin: %6zd\n", bsize, size, bin); + printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); //printf("QNULL(%6zd), ", wsize); //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); last_bin = bin; + min_bsize = bsize; } + last_bsize = bsize; } } @@ -186,6 +190,7 @@ int main() { //free(p1); //p2 = malloc(32); //mi_free(p2); + mi_collect(true); mi_stats_print(NULL); return 0; } From 6ee248b012a56becf6a52b60a2a461f75c7cc7dd Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 15 Aug 2019 14:40:15 -0700 Subject: [PATCH 003/352] wip: fixing bugs in new segment allocation --- include/mimalloc-internal.h | 7 +------ src/segment.c | 35 ++++++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e8fa1ba1..bb60458f 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -254,12 +254,7 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { return (mi_slice_t*)(p); } -static size_t mi_slice_index(const mi_slice_t* slice) { - mi_segment_t* segment = _mi_ptr_segment(slice); - ptrdiff_t index = slice - segment->slices; - mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); - return index; -} + // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { diff --git a/src/segment.c b/src/segment.c index b70dc664..5b08154b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -16,6 +16,14 @@ terms of the MIT license. A copy of the license can be found in the file static void mi_segment_map_allocated_at(const mi_segment_t* segment); static void mi_segment_map_freed_at(const mi_segment_t* segment); +static size_t mi_slice_index(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + ptrdiff_t index = slice - segment->slices; + mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); + return index; +} + + /* ----------------------------------------------------------- Segment allocation @@ -346,7 +354,7 @@ static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) { static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_count); - mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld); + mi_page_queue_t* pq = (slice_count > MI_SLICES_PER_SEGMENT ? NULL : mi_page_queue_for(slice_count,tld)); if (slice_count==0) slice_count = 1; mi_assert_internal(slice_index + slice_count - 1 < segment->slice_count); @@ -360,8 +368,9 @@ static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size end->slice_offset = (uint16_t)slice_count - 1; end->block_size = 0; } - // and push it on the free page queue - mi_page_queue_push( pq, mi_slice_to_page(slice) ); + // and push it on the free page queue (if it was not a huge page) + if (pq != NULL) mi_page_queue_push( pq, mi_slice_to_page(slice) ); + else slice->block_size = 0; // mark huge page as free anyways } static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) { @@ -408,6 +417,7 @@ static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tl static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) { mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0); + if (slice->slice_count > MI_SLICES_PER_SEGMENT) return; // huge page mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld); mi_page_queue_delete(pq, mi_slice_to_page(slice)); } @@ -508,7 +518,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages .. - if (slice->block_size == 0) { + if (slice->block_size == 0 && segment->kind != MI_SEGMENT_HUGE) { mi_segment_page_delete(slice, tld); } page_count++; @@ -581,7 +591,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->used > 0); segment->used--; - + // free and coalesce the page mi_slice_t* slice = mi_page_to_slice(page); size_t slice_count = slice->slice_count; @@ -627,7 +637,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - + // reset the page memory to reduce memory pressure? if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; @@ -812,9 +822,20 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); segment->used = 1; mi_page_t* page = mi_slice_to_page(&segment->slices[0]); - page->slice_count = segment->slice_count; + mi_assert_internal(page->block_size > 0 && page->slice_count > 0); + size_t initial_count = page->slice_count; + page = page + initial_count; + page->slice_count = segment->slice_count - initial_count; page->slice_offset = 0; page->block_size = size; + mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size); + // set back pointers + for (size_t i = 1; i < page->slice_count; i++) { + mi_slice_t* slice = (mi_slice_t*)(page + i); + slice->slice_offset = (uint16_t)i; + slice->block_size = 1; + slice->slice_count = 0; + } mi_page_init_flags(page,segment->thread_id); return page; } From f2ba95bc64e3e2a4f1d2054cf15eec66cc3b0db4 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 15 Aug 2019 22:00:42 -0700 Subject: [PATCH 004/352] first working version of new segment allocation --- include/mimalloc-internal.h | 7 +++- src/page.c | 2 ++ src/segment.c | 71 ++++++++++++++++++++++--------------- 3 files changed, 51 insertions(+), 29 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index bb60458f..e8fa1ba1 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -254,7 +254,12 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { return (mi_slice_t*)(p); } - +static size_t mi_slice_index(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + ptrdiff_t index = slice - segment->slices; + mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); + return index; +} // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { diff --git a/src/page.c b/src/page.c index 17dd98fb..bb205426 100644 --- a/src/page.c +++ b/src/page.c @@ -556,6 +556,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi size_t page_size; _mi_segment_page_start(segment, page, &page_size); page->block_size = block_size; + mi_assert_internal(page->block_size <= page_size); + mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #if MI_SECURE diff --git a/src/segment.c b/src/segment.c index 5b08154b..e6eb0b08 100644 --- a/src/segment.c +++ b/src/segment.c @@ -16,12 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file static void mi_segment_map_allocated_at(const mi_segment_t* segment); static void mi_segment_map_freed_at(const mi_segment_t* segment); -static size_t mi_slice_index(const mi_slice_t* slice) { - mi_segment_t* segment = _mi_ptr_segment(slice); - ptrdiff_t index = slice - segment->slices; - mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); - return index; -} + /* ----------------------------------------------------------- @@ -158,16 +153,18 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { while(slice < &segment->slices[segment->slice_count]) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); + size_t index = mi_slice_index(slice); + size_t maxindex = (index + slice->slice_count >= segment->slice_count ? segment->slice_count : index + slice->slice_count) - 1; if (slice->block_size > 0) { // a page in use, all slices need their back offset set used_count++; - for (size_t i = 0; i < slice->slice_count; i++) { - mi_assert_internal((slice+i)->slice_offset == i); - mi_assert_internal(i==0 || (slice+i)->slice_count == 0); - mi_assert_internal(i==0 || (slice+i)->block_size == 1); + for (size_t i = index; i <= maxindex; i++) { + mi_assert_internal(segment->slices[i].slice_offset == i - index); + mi_assert_internal(i==index || segment->slices[i].slice_count == 0); + mi_assert_internal(i==index || segment->slices[i].block_size == 1); } } else { // free range of slices; only last slice needs a valid back offset - mi_slice_t* end = slice + slice->slice_count - 1; + mi_slice_t* end = &segment->slices[maxindex]; mi_assert_internal(slice == end - end->slice_offset); mi_assert_internal(slice == end || end->slice_count == 0 ); mi_assert_internal(end->block_size == 0); @@ -176,7 +173,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_page_queue_contains(pq,mi_slice_to_page(slice))); } } - slice = slice + slice->slice_count; + slice = &segment->slices[maxindex+1]; } mi_assert_internal(slice == &segment->slices[segment->slice_count]); mi_assert_internal(used_count == segment->used + 1); @@ -239,7 +236,8 @@ static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_si ; if (info_size != NULL) *info_size = isize; if (pre_size != NULL) *pre_size = isize + guardsize; - size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_SEGMENT_SLICE_SIZE) ); + isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE); + size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) ); mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); return segment_size; } @@ -261,14 +259,14 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { } -static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { +static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; mi_segment_map_freed_at(segment); - mi_segments_track_size(-((long)segment_size),tld); + mi_segments_track_size(-((long)segment->segment_size),tld); if (mi_option_is_enabled(mi_option_secure)) { _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_os_free(segment, segment_size, /*segment->memid,*/ tld->stats); + _mi_os_free(segment, segment->segment_size, /*segment->memid,*/ tld->stats); } @@ -301,7 +299,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) { while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { mi_segment_t* segment = mi_segment_cache_pop(0,tld); mi_assert_internal(segment != NULL); - if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); + if (segment != NULL) mi_segment_os_free(segment, tld); } return true; } @@ -326,7 +324,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) void _mi_segment_thread_collect(mi_segments_tld_t* tld) { mi_segment_t* segment; while ((segment = mi_segment_cache_pop(0,tld)) != NULL) { - mi_segment_os_free(segment, segment->segment_size, tld); + mi_segment_os_free(segment, tld); } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); @@ -347,6 +345,10 @@ static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) { return &segment->slices[segment->slice_count-1]; } +static size_t mi_slices_in(size_t size) { + return (size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE; +} + /* ----------------------------------------------------------- Page management ----------------------------------------------------------- */ @@ -354,7 +356,7 @@ static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) { static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_count); - mi_page_queue_t* pq = (slice_count > MI_SLICES_PER_SEGMENT ? NULL : mi_page_queue_for(slice_count,tld)); + mi_page_queue_t* pq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_page_queue_for(slice_count,tld)); if (slice_count==0) slice_count = 1; mi_assert_internal(slice_index + slice_count - 1 < segment->slice_count); @@ -387,6 +389,7 @@ static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segmen mi_assert_internal(page->block_size > 0); // no more in free queue if (page->slice_count <= slice_count) return; mi_segment_t* segment = _mi_page_segment(page); + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count; size_t next_count = page->slice_count - slice_count; mi_segment_page_init( segment, next_index, next_count, tld ); @@ -394,6 +397,7 @@ static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segmen } static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_SIZE_MAX); // search from best fit up mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld); if (slice_count == 0) slice_count = 1; @@ -417,7 +421,7 @@ static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tl static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) { mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0); - if (slice->slice_count > MI_SLICES_PER_SEGMENT) return; // huge page + mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE); mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld); mi_page_queue_delete(pq, mi_slice_to_page(slice)); } @@ -434,8 +438,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m size_t info_size; size_t pre_size; size_t segment_size = mi_segment_size(required, &pre_size, &info_size); - size_t slice_count = segment_size / MI_SEGMENT_SLICE_SIZE; - mi_assert_internal(segment_size >= required); + size_t slice_count = mi_slices_in(segment_size); + if (slice_count > MI_SLICES_PER_SEGMENT) slice_count = MI_SLICES_PER_SEGMENT; + mi_assert_internal(segment_size - _mi_align_up(sizeof(mi_segment_t),MI_SEGMENT_SLICE_SIZE) >= required); + mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0); // Try to get it from our thread local cache first @@ -514,7 +520,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t // Remove the free pages mi_slice_t* slice = &segment->slices[0]; size_t page_count = 0; - while (slice < mi_segment_last_slice(segment)) { + while (slice <= mi_segment_last_slice(segment)) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages .. @@ -534,7 +540,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t } else { // otherwise return it to the OS - mi_segment_os_free(segment, segment->segment_size, tld); + mi_segment_os_free(segment, tld); } } @@ -657,7 +663,15 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld page->block_size = 1; // and free it - return mi_segment_page_free_coalesce(page, tld); + if (segment->kind != MI_SEGMENT_HUGE) { + return mi_segment_page_free_coalesce(page, tld); + } + else { + mi_assert_internal(segment->used == 1); + segment->used--; + page->block_size = 0; // pretend free + return mi_page_to_slice(page); + } } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -699,7 +713,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { // remove the free pages from our lists mi_slice_t* slice = &segment->slices[0]; - while (slice < mi_segment_last_slice(segment)) { + while (slice <= mi_segment_last_slice(segment)) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); if (slice->block_size == 0) { // a free page @@ -825,12 +839,13 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_assert_internal(page->block_size > 0 && page->slice_count > 0); size_t initial_count = page->slice_count; page = page + initial_count; - page->slice_count = segment->slice_count - initial_count; + page->slice_count = (segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE; page->slice_offset = 0; page->block_size = size; mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size); + mi_assert_internal(page->slice_count >= segment->slice_count - initial_count); // set back pointers - for (size_t i = 1; i < page->slice_count; i++) { + for (size_t i = 1; i slice_count; i++) { mi_slice_t* slice = (mi_slice_t*)(page + i); slice->slice_offset = (uint16_t)i; slice->block_size = 1; From a0b4ac2f66f36a117b69ec3d45b55b771fdbecbc Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 15 Aug 2019 23:19:52 -0700 Subject: [PATCH 005/352] new segment allocation; good results with Qas service --- include/mimalloc-internal.h | 22 ++++++++++---------- include/mimalloc-types.h | 8 ++++---- src/page.c | 4 ++-- src/segment.c | 40 +++++++++++++++++++++---------------- 4 files changed, 40 insertions(+), 34 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e8fa1ba1..3aee4ae1 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -254,20 +254,21 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { return (mi_slice_t*)(p); } -static size_t mi_slice_index(const mi_slice_t* slice) { - mi_segment_t* segment = _mi_ptr_segment(slice); - ptrdiff_t index = slice - segment->slices; - mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); - return index; -} - // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { - mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment == NULL || page == mi_slice_to_page(&segment->slices[mi_slice_index(mi_page_to_slice((mi_page_t*)page))])); + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment == NULL || (mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_count); return segment; } +static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { + mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); + mi_assert_internal(start >= _mi_ptr_segment(slice)->slices); + mi_assert_internal(start->slice_offset == 0); + mi_assert_internal(start + start->slice_count > slice); + return start; +} + // Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; @@ -275,8 +276,7 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT; mi_assert_internal(idx < segment->slice_count); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; - mi_slice_t* slice = slice0 - slice0->slice_offset; // adjust to the block that holds the page data - mi_assert_internal(slice->slice_count > slice0->slice_offset); + mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count); return mi_slice_to_page(slice); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index f4042e60..7a240b7e 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -78,7 +78,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb -#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SEGMENT_SLICE_SHIFT) // 512kb +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SEGMENT_SLICE_SHIFT) // 1024kb // Derived constants @@ -90,7 +90,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SMALL_PAGE_SIZE (1<thread_free) != NULL) { // quick test to avoid an atomic operation _mi_page_thread_free_collect(page); @@ -703,7 +703,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { General allocation ----------------------------------------------------------- */ -// A huge page is allocated directly without being in a queue +// Large and huge pages are allocated directly without being in a queue static mi_page_t* mi_large_page_alloc(mi_heap_t* heap, size_t size) { size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t); mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); diff --git a/src/segment.c b/src/segment.c index e6eb0b08..fd16e2e9 100644 --- a/src/segment.c +++ b/src/segment.c @@ -74,6 +74,13 @@ static size_t mi_slice_bin(size_t slice_count) { return bin; } +static size_t mi_slice_index(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + ptrdiff_t index = slice - segment->slices; + mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); + return index; +} + /* ----------------------------------------------------------- Page Queues @@ -98,7 +105,7 @@ static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq) } */ -static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) { +static void mi_page_queue_enqueue(mi_page_queue_t* pq, mi_page_t* page) { // todo: or push to the end? mi_assert_internal(page->prev == NULL && page->next==NULL); page->prev = NULL; // paranoia @@ -158,14 +165,14 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { if (slice->block_size > 0) { // a page in use, all slices need their back offset set used_count++; for (size_t i = index; i <= maxindex; i++) { - mi_assert_internal(segment->slices[i].slice_offset == i - index); + mi_assert_internal(segment->slices[i].slice_offset == (i - index)*sizeof(mi_page_t)); mi_assert_internal(i==index || segment->slices[i].slice_count == 0); mi_assert_internal(i==index || segment->slices[i].block_size == 1); } } else { // free range of slices; only last slice needs a valid back offset mi_slice_t* end = &segment->slices[maxindex]; - mi_assert_internal(slice == end - end->slice_offset); + mi_assert_internal((uint8_t*)slice == (uint8_t*)end - end->slice_offset); mi_assert_internal(slice == end || end->slice_count == 0 ); mi_assert_internal(end->block_size == 0); if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { @@ -272,7 +279,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, // and no more than 1. -#define MI_SEGMENT_CACHE_MAX (2) +#define MI_SEGMENT_CACHE_MAX (4) #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset @@ -362,16 +369,16 @@ static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size // set first and last slice (the intermediates can be undetermined) mi_slice_t* slice = &segment->slices[slice_index]; - slice->slice_count = slice_count; + slice->slice_count = (uint32_t)slice_count; slice->slice_offset = 0; if (slice_count > 1) { mi_slice_t* end = &segment->slices[slice_index + slice_count - 1]; end->slice_count = 0; - end->slice_offset = (uint16_t)slice_count - 1; + end->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); end->block_size = 0; } // and push it on the free page queue (if it was not a huge page) - if (pq != NULL) mi_page_queue_push( pq, mi_slice_to_page(slice) ); + if (pq != NULL) mi_page_queue_enqueue( pq, mi_slice_to_page(slice) ); else slice->block_size = 0; // mark huge page as free anyways } @@ -393,7 +400,7 @@ static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segmen size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count; size_t next_count = page->slice_count - slice_count; mi_segment_page_init( segment, next_index, next_count, tld ); - page->slice_count = slice_count; + page->slice_count = (uint32_t)slice_count; } static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { @@ -494,11 +501,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m for (size_t i = 0; i < islice_count; i++) { mi_slice_t* slice = &segment->slices[i]; if (i==0) { - slice->slice_count = islice_count; + slice->slice_count = (uint32_t)islice_count; slice->block_size = islice_count * MI_SEGMENT_SLICE_SIZE; } else { - slice->slice_offset = (uint16_t)i; + slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i); slice->block_size = 1; } } @@ -553,7 +560,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require mi_assert_internal(required <= MI_LARGE_SIZE_MAX && page_kind <= MI_PAGE_LARGE); // find a free page - size_t page_size = _mi_align_up(required,MI_SEGMENT_SLICE_SIZE); + size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE)); size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); if (page==NULL) { @@ -569,7 +576,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require bool commit = false; bool unreset = false; for (size_t i = 0; i < page->slice_count; i++, slice++) { - slice->slice_offset = (uint16_t)i; + slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i); slice->block_size = 1; if (i > 0) slice->slice_count = 0; if (!segment->all_committed && !slice->is_committed) { @@ -610,8 +617,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl mi_segment_page_delete(next, tld); } if (slice > segment->slices) { - mi_slice_t* prev = slice - 1; - prev = prev - prev->slice_offset; + mi_slice_t* prev = mi_slice_first(slice - 1); mi_assert_internal(prev >= segment->slices); if (prev->block_size==0) { // free previous slice -- remove it from free and merge @@ -653,7 +659,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld } // zero the page data - size_t slice_count = page->slice_count; // don't clear the slice_count + uint32_t slice_count = page->slice_count; // don't clear the slice_count bool is_reset = page->is_reset; // don't clear the reset flag bool is_committed = page->is_committed; // don't clear the commit flag memset(page, 0, sizeof(*page)); @@ -839,7 +845,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_assert_internal(page->block_size > 0 && page->slice_count > 0); size_t initial_count = page->slice_count; page = page + initial_count; - page->slice_count = (segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE; + page->slice_count = (uint32_t)((segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE); page->slice_offset = 0; page->block_size = size; mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size); @@ -847,7 +853,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld // set back pointers for (size_t i = 1; i slice_count; i++) { mi_slice_t* slice = (mi_slice_t*)(page + i); - slice->slice_offset = (uint16_t)i; + slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i); slice->block_size = 1; slice->slice_count = 0; } From 91497e8d2d797ab8b9d55c9618b36a9f3f88dcd9 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 16 Aug 2019 17:49:49 -0700 Subject: [PATCH 006/352] whitespace and warning fix --- include/mimalloc-internal.h | 18 ++++----- src/segment.c | 78 ++++++++++++++++++------------------- 2 files changed, 47 insertions(+), 49 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 3aee4ae1..69c150c2 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -17,7 +17,7 @@ terms of the MIT license. A copy of the license can be found in the file #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else -#define mi_trace_message(...) +#define mi_trace_message(...) #endif @@ -144,8 +144,8 @@ bool _mi_page_is_valid(mi_page_t* page); Inlined definitions ----------------------------------------------------------- */ #define UNUSED(x) (void)(x) -#if (MI_DEBUG>0) -#define UNUSED_RELEASE(x) +#if (MI_DEBUG>0) +#define UNUSED_RELEASE(x) #else #define UNUSED_RELEASE(x) UNUSED(x) #endif @@ -256,13 +256,13 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { - mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment == NULL || (mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_count); + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_count)); return segment; } static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { - mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); + mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); mi_assert_internal(start >= _mi_ptr_segment(slice)->slices); mi_assert_internal(start->slice_offset == 0); mi_assert_internal(start + start->slice_count > slice); @@ -275,8 +275,8 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE); uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT; mi_assert_internal(idx < segment->slice_count); - mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; - mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data + mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; + mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count); return mi_slice_to_page(slice); @@ -354,7 +354,7 @@ static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { } static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) { - page->flags = thread_id; + page->flags = thread_id; } static inline bool mi_page_is_in_full(const mi_page_t* page) { diff --git a/src/segment.c b/src/segment.c index fd16e2e9..31fbccf9 100644 --- a/src/segment.c +++ b/src/segment.c @@ -21,7 +21,7 @@ static void mi_segment_map_freed_at(const mi_segment_t* segment); /* ----------------------------------------------------------- Segment allocation - + In any case the memory for a segment is virtual and only committed on demand (i.e. we are careful to not touch the memory @@ -113,7 +113,7 @@ static void mi_page_queue_enqueue(mi_page_queue_t* pq, mi_page_t* page) { pq->first = page; if (page->next != NULL) page->next->prev = page; else pq->last = page; - page->block_size = 0; // free + page->block_size = 0; // free } static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) { @@ -141,7 +141,7 @@ static void mi_page_queue_delete(mi_page_queue_t* pq, mi_page_t* page) { ----------------------------------------------------------- */ #if (MI_DEBUG > 1) -static bool mi_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) { +static bool mi_segment_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) { for (mi_page_t* p = pq->first; p != NULL; p = p->next) { if (p==page) return true; } @@ -159,7 +159,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_page_queue_t* pq; while(slice < &segment->slices[segment->slice_count]) { mi_assert_internal(slice->slice_count > 0); - mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(slice->slice_offset == 0); size_t index = mi_slice_index(slice); size_t maxindex = (index + slice->slice_count >= segment->slice_count ? segment->slice_count : index + slice->slice_count) - 1; if (slice->block_size > 0) { // a page in use, all slices need their back offset set @@ -177,9 +177,9 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(end->block_size == 0); if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { pq = mi_page_queue_for(slice->slice_count,tld); - mi_assert_internal(mi_page_queue_contains(pq,mi_slice_to_page(slice))); + mi_assert_internal(mi_segment_page_queue_contains(pq,mi_slice_to_page(slice))); } - } + } slice = &segment->slices[maxindex+1]; } mi_assert_internal(slice == &segment->slices[segment->slice_count]); @@ -193,7 +193,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Start of the page available memory; can be used on uninitialized pages -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); ptrdiff_t idx = slice - segment->slices; @@ -233,7 +233,7 @@ static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_si size_t page_size = _mi_os_page_size(); size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); size_t guardsize = 0; - + if (mi_option_is_enabled(mi_option_secure)) { // in secure mode, we set up a protected page in between the segment info // and the page data (and one at the end of the segment) @@ -296,7 +296,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t } static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->cache_count < MI_SEGMENT_CACHE_MAX + if (tld->cache_count < MI_SEGMENT_CACHE_MAX && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) ) { // always allow 1 element cache return false; @@ -339,7 +339,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { /* ----------------------------------------------------------- - Slices + Slices ----------------------------------------------------------- */ @@ -399,11 +399,11 @@ static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segmen mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count; size_t next_count = page->slice_count - slice_count; - mi_segment_page_init( segment, next_index, next_count, tld ); + mi_segment_page_init( segment, next_index, next_count, tld ); page->slice_count = (uint32_t)slice_count; } -static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { +static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_SIZE_MAX); // search from best fit up mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld); @@ -422,8 +422,8 @@ static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tl } pq++; } - // could not find a page.. - return NULL; + // could not find a page.. + return NULL; } static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) { @@ -452,7 +452,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0); // Try to get it from our thread local cache first - bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit) + bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit) || required > 0; // huge page mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment==NULL) { @@ -482,7 +482,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m _mi_os_protect((uint8_t*)segment + info_size, (pre_size - info_size)); size_t os_page_size = _mi_os_page_size(); // and protect the last page too - _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); + _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); slice_count--; // don't use the last slice :-( } @@ -519,7 +519,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { - mi_assert_internal(segment != NULL); + mi_assert_internal(segment != NULL); mi_assert_internal(segment->next == NULL); mi_assert_internal(segment->prev == NULL); mi_assert_internal(segment->used == 0); @@ -541,7 +541,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t // stats _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } @@ -555,7 +555,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t Page allocation ----------------------------------------------------------- */ -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_assert_internal(required <= MI_LARGE_SIZE_MAX && page_kind <= MI_PAGE_LARGE); @@ -565,7 +565,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); if (page==NULL) { // no free page, allocate a new segment and try again - if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL; // OOM + if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL; // OOM return mi_segment_page_alloc(page_kind, required, tld, os_tld); } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); @@ -581,11 +581,11 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require if (i > 0) slice->slice_count = 0; if (!segment->all_committed && !slice->is_committed) { slice->is_committed = true; - commit = true; + commit = true; } if (slice->is_reset) { slice->is_reset = false; - unreset = true; + unreset = true; } } uint8_t* page_start = mi_slice_start(mi_page_to_slice(page)); @@ -593,7 +593,7 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t require if(unreset){ _mi_os_unreset(page_start, page_size, tld->stats); } // initialize the page and return - mi_assert_internal(segment->thread_id == _mi_thread_id()); + mi_assert_internal(segment->thread_id == _mi_thread_id()); segment->used++; mi_page_init_flags(page, segment->thread_id); return page; @@ -604,7 +604,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->used > 0); segment->used--; - + // free and coalesce the page mi_slice_t* slice = mi_page_to_slice(page); size_t slice_count = slice->slice_count; @@ -617,7 +617,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl mi_segment_page_delete(next, tld); } if (slice > segment->slices) { - mi_slice_t* prev = mi_slice_first(slice - 1); + mi_slice_t* prev = mi_slice_first(slice - 1); mi_assert_internal(prev >= segment->slices); if (prev->block_size==0) { // free previous slice -- remove it from free and merge @@ -627,7 +627,7 @@ static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tl slice = prev; } } - + // and add the new free page mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld); mi_assert_expensive(mi_segment_is_valid(segment,tld)); @@ -649,7 +649,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - + // reset the page memory to reduce memory pressure? if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; @@ -696,7 +696,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) else if (segment->used == segment->abandoned) { // only abandoned pages; remove from free list and abandon mi_segment_abandon(segment,tld); - } + } } @@ -718,7 +718,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_expensive(mi_segment_is_valid(segment,tld)); // remove the free pages from our lists - mi_slice_t* slice = &segment->slices[0]; + mi_slice_t* slice = &segment->slices[0]; while (slice <= mi_segment_last_slice(segment)) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); @@ -807,14 +807,14 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen segment->abandoned--; if (mi_page_all_free(page)) { // if everything free by now, free the page - slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing + slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing } else { // otherwise reclaim it mi_page_init_flags(page, segment->thread_id); _mi_page_reclaim(heap, page); } - } + } mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0); slice = slice + slice->slice_count; } @@ -824,7 +824,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_free(segment,false,tld); } else { - reclaimed++; + reclaimed++; } } return (reclaimed>0); @@ -847,15 +847,15 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld page = page + initial_count; page->slice_count = (uint32_t)((segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE); page->slice_offset = 0; - page->block_size = size; + page->block_size = size; mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size); mi_assert_internal(page->slice_count >= segment->slice_count - initial_count); - // set back pointers + // set back pointers for (size_t i = 1; i slice_count; i++) { mi_slice_t* slice = (mi_slice_t*)(page + i); slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i); slice->block_size = 1; - slice->slice_count = 0; + slice->slice_count = 0; } mi_page_init_flags(page,segment->thread_id); return page; @@ -899,7 +899,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ ----------------------------------------------------------- */ #if (MI_INTPTR_SIZE==8) -#define MI_MAX_ADDRESS ((size_t)1 << 44) // 16TB +#define MI_MAX_ADDRESS ((size_t)1 << 44) // 16TB #else #define MI_MAX_ADDRESS ((size_t)1 << 31) // 2Gb #endif @@ -911,7 +911,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 1KiB per TB with 128MiB segments static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { - mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB? + mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB? uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; *bitidx = segindex % (8*MI_INTPTR_SIZE); return (segindex / (8*MI_INTPTR_SIZE)); @@ -953,7 +953,7 @@ static mi_segment_t* _mi_segment_of(const void* p) { return segment; // yes, allocated by us } if (index==0) return NULL; - // search downwards for the first segment in case it is an interior pointer + // search downwards for the first segment in case it is an interior pointer // could be slow but searches in 256MiB steps trough valid huge objects // note: we could maintain a lowest index to speed up the path for invalid pointers? size_t lobitidx; @@ -1006,5 +1006,3 @@ static void* mi_segment_range_of(const void* p, size_t* size) { } } */ - - From cce38bc147b6290607d086bc1126946dd0cf9ab1 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 24 Aug 2019 07:32:23 -0700 Subject: [PATCH 007/352] more conservative setting to avoid internal fragmentation --- include/mimalloc-types.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 9a482aff..77e9e4ab 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -78,7 +78,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb -#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SEGMENT_SLICE_SHIFT) // 1024kb +#define MI_MEDIUM_PAGE_SHIFT ( 2 + MI_SEGMENT_SLICE_SHIFT) // 512kb // Derived constants @@ -90,12 +90,12 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SMALL_PAGE_SIZE (1<0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) - void* padding[1]; // 12 words on 64-bit in secure mode, 12 words on 32-bit plain + // without padding: 11 words on 64-bit, 13 on 32-bit. Secure adds one word + #if (MI_SECURE==0) + void* padding[1]; // 12 words on 64-bit, 14 words on 32-bit #endif } mi_page_t; @@ -212,15 +212,14 @@ typedef mi_page_t mi_slice_t; // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { - struct mi_segment_s* next; - struct mi_segment_s* prev; - volatile struct mi_segment_s* abandoned_next; - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t used; // count of pages in use - size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` + struct mi_segment_s* next; // the list of freed segments in the cache + volatile struct mi_segment_s* abandoned_next; // the list of abandoned segments + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t used; // count of pages in use + size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` - size_t memid; // id for the os-level memory manager + uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + size_t memid; // id for the os-level memory manager bool all_committed; // layout like this to optimize access in `mi_free` From 612b2cc9b764783dbb3b52aeb47b35eab405e6db Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 24 Aug 2019 12:20:32 -0700 Subject: [PATCH 008/352] clean up segment slice handling --- include/mimalloc-internal.h | 6 +- include/mimalloc-types.h | 44 ++- src/init.c | 39 ++- src/segment.c | 592 ++++++++++++++++++------------------ test/test-stress.c | 6 +- 5 files changed, 352 insertions(+), 335 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 7566baa0..bf5c2e04 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -259,7 +259,7 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_count)); + mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries)); return segment; } @@ -276,11 +276,11 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE); uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT; - mi_assert_internal(idx < segment->slice_count); + mi_assert_internal(idx < segment->slice_entries); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data mi_assert_internal(slice->slice_offset == 0); - mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count); + mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries); return mi_slice_to_page(slice); } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 77e9e4ab..78b643ad 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -78,7 +78,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb -#define MI_MEDIUM_PAGE_SHIFT ( 2 + MI_SEGMENT_SLICE_SHIFT) // 512kb +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb // Derived constants @@ -109,6 +109,9 @@ terms of the MIT license. A copy of the license can be found in the file #error "define more bins" #endif +// Maximum slice offset (7) +#define MI_MAX_SLICE_OFFSET ((MI_MEDIUM_PAGE_SIZE / MI_SEGMENT_SLICE_SIZE) - 1) + typedef uintptr_t mi_encoded_t; // free lists contain blocks @@ -206,6 +209,12 @@ typedef enum mi_segment_kind_e { MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; +#define MI_COMMIT_SIZE ((size_t)2 << 20) // OS large page size + +#if ((MI_SEGMENT_SIZE / MI_COMMIT_SIZE) > MI_INTPTR_SIZE) +#error "not enough commit bits to cover the segment size" +#endif + typedef mi_page_t mi_slice_t; // Segments are large allocated memory blocks (2mb on 64 bit) from @@ -214,18 +223,21 @@ typedef mi_page_t mi_slice_t; typedef struct mi_segment_s { struct mi_segment_s* next; // the list of freed segments in the cache volatile struct mi_segment_s* abandoned_next; // the list of abandoned segments - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t used; // count of pages in use - size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` - size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` - size_t memid; // id for the os-level memory manager - bool all_committed; + + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t used; // count of pages in use + uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + + size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT` + size_t segment_info_slices; // initial slices we are using segment info and possible guard pages. + + bool allow_decommit; + uintptr_t commit_mask; // layout like this to optimize access in `mi_free` mi_segment_kind_t kind; uintptr_t thread_id; - size_t slice_count; // slices in this segment (at most MI_SLICES_PER_SEGMENT) + size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` mi_slice_t slices[MI_SLICES_PER_SEGMENT]; } mi_segment_t; @@ -371,17 +383,19 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // Thread Local data // ------------------------------------------------------ -// Queue of segments -typedef struct mi_segment_queue_s { - mi_segment_t* first; - mi_segment_t* last; -} mi_segment_queue_t; +// A "span" is is an available range of slices. The span queues keep +// track of slice spans of at most the given `slice_count` (but more than the previous size class). +typedef struct mi_span_queue_s { + mi_slice_t* first; + mi_slice_t* last; + size_t slice_count; +} mi_span_queue_t; #define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT) // Segments thread local data typedef struct mi_segments_tld_s { - mi_page_queue_t pages[MI_SEGMENT_BIN_MAX+1]; // free pages inside segments + mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments diff --git a/src/init.c b/src/init.c index b0fa60cc..d4ba5572 100644 --- a/src/init.c +++ b/src/init.c @@ -21,7 +21,7 @@ const mi_page_t _mi_page_empty = { 0, // used NULL, 0, 0, 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) + #if (MI_SECURE==0) , { NULL } // padding #endif }; @@ -68,6 +68,18 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() + +// Empty slice span queues for every bin +#define SQNULL(sz) { NULL, NULL, sz } +#define MI_SEGMENT_SPAN_QUEUES_EMPTY \ + { SQNULL(1), \ + SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \ + SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \ + SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \ + SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ + SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } + + // -------------------------------------------------------- // Statically allocate an empty heap as the initial // thread local value for the default heap, @@ -89,25 +101,26 @@ const mi_heap_t _mi_heap_empty = { false }; +#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats))) + +static const mi_tld_t tld_empty = { + 0, + NULL, + { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_empty_stats }, // segments + { 0, tld_empty_stats }, // os + { MI_STATS_NULL } // stats +}; + mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; -// Empty page queues for every bin -#define SQNULL(sz) { NULL, NULL, sz } -#define MI_SEGMENT_PAGE_QUEUES_EMPTY \ - { SQNULL(1), \ - SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \ - SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \ - SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \ - SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ - SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) static mi_tld_t tld_main = { 0, &_mi_heap_main, - { MI_SEGMENT_PAGE_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments + { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -223,12 +236,12 @@ static bool _mi_heap_init(void) { } mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; + memcpy(tld, &tld_empty, sizeof(*tld)); memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); heap->random = _mi_random_init(heap->thread_id); heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1; - heap->tld = tld; - memset(tld, 0, sizeof(*tld)); + heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; tld->os.stats = &tld->stats; diff --git a/src/segment.c b/src/segment.c index 64b9f4ac..3b3272a1 100644 --- a/src/segment.c +++ b/src/segment.c @@ -31,6 +31,27 @@ static void mi_segment_map_freed_at(const mi_segment_t* segment); be reclaimed by still running threads, much like work-stealing. ----------------------------------------------------------- */ +/* ----------------------------------------------------------- + Slices +----------------------------------------------------------- */ + +static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) { + return &segment->slices[segment->slice_entries]; +} + +/* +static uint8_t* mi_slice_start(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment)); + return ((uint8_t*)segment + ((slice - segment->slices)*MI_SEGMENT_SLICE_SIZE)); +} + + +static size_t mi_slices_in(size_t size) { + return (size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE; +} +*/ + /* ----------------------------------------------------------- Bins ----------------------------------------------------------- */ @@ -67,7 +88,7 @@ static size_t mi_slice_bin8(size_t slice_count) { static size_t mi_slice_bin(size_t slice_count) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE); - mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) == MI_SEGMENT_BIN_MAX); + mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) <= MI_SEGMENT_BIN_MAX); size_t bin = (slice_count==0 ? 0 : mi_slice_bin8(slice_count)); mi_assert_internal(bin <= MI_SEGMENT_BIN_MAX); return bin; @@ -76,62 +97,43 @@ static size_t mi_slice_bin(size_t slice_count) { static size_t mi_slice_index(const mi_slice_t* slice) { mi_segment_t* segment = _mi_ptr_segment(slice); ptrdiff_t index = slice - segment->slices; - mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); + mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_entries); return index; } /* ----------------------------------------------------------- - Page Queues + Slice span queues ----------------------------------------------------------- */ -/* -static bool mi_page_queue_is_empty(mi_page_queue_t* pq) { - return (pq->first == NULL); -} -static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq) -{ - mi_page_t* page = pq->first; - if (page==NULL) return NULL; - mi_assert_internal(page->prev==NULL); - pq->first = page->next; - if (page->next == NULL) pq->last = NULL; - else page->next->prev = NULL; - page->next = NULL; - page->prev = NULL; // paranoia - page->block_size = 1; // no more free - return page; -} -*/ - -static void mi_page_queue_enqueue(mi_page_queue_t* pq, mi_page_t* page) { +static void mi_span_queue_push(mi_span_queue_t* sq, mi_slice_t* slice) { // todo: or push to the end? - mi_assert_internal(page->prev == NULL && page->next==NULL); - page->prev = NULL; // paranoia - page->next = pq->first; - pq->first = page; - if (page->next != NULL) page->next->prev = page; - else pq->last = page; - page->block_size = 0; // free + mi_assert_internal(slice->prev == NULL && slice->next==NULL); + slice->prev = NULL; // paranoia + slice->next = sq->first; + sq->first = slice; + if (slice->next != NULL) slice->next->prev = slice; + else sq->last = slice; + slice->block_size = 0; // free } -static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) { +static mi_span_queue_t* mi_span_queue_for(size_t slice_count, mi_segments_tld_t* tld) { size_t bin = mi_slice_bin(slice_count); - mi_page_queue_t* pq = &tld->pages[bin]; - // mi_assert_internal(pq->block_size >= slice_count); - return pq; + mi_span_queue_t* sq = &tld->spans[bin]; + mi_assert_internal(sq->slice_count >= slice_count); + return sq; } -static void mi_page_queue_delete(mi_page_queue_t* pq, mi_page_t* page) { - mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0); - // should work too if the queue does not contain page (which can happen during reclaim) - if (page->prev != NULL) page->prev->next = page->next; - if (page == pq->first) pq->first = page->next; - if (page->next != NULL) page->next->prev = page->prev; - if (page == pq->last) pq->last = page->prev; - page->prev = NULL; - page->next = NULL; - page->block_size = 1; // no more free +static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) { + mi_assert_internal(slice->block_size==0 && slice->slice_count>0 && slice->slice_offset==0); + // should work too if the queue does not contain slice (which can happen during reclaim) + if (slice->prev != NULL) slice->prev->next = slice->next; + if (slice == sq->first) sq->first = slice->next; + if (slice->next != NULL) slice->next->prev = slice->prev; + if (slice == sq->last) sq->last = slice->prev; + slice->prev = NULL; + slice->next = NULL; + slice->block_size = 1; // no more free } @@ -140,9 +142,9 @@ static void mi_page_queue_delete(mi_page_queue_t* pq, mi_page_t* page) { ----------------------------------------------------------- */ #if (MI_DEBUG > 1) -static bool mi_segment_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) { - for (mi_page_t* p = pq->first; p != NULL; p = p->next) { - if (p==page) return true; +static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) { + for (mi_slice_t* s = sq->first; s != NULL; s = s->next) { + if (s==slice) return true; } return false; } @@ -154,34 +156,42 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; + const mi_slice_t* end = mi_segment_slices_end(segment); size_t used_count = 0; - mi_page_queue_t* pq; - while(slice < &segment->slices[segment->slice_count]) { + mi_span_queue_t* sq; + while(slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); size_t index = mi_slice_index(slice); - size_t maxindex = (index + slice->slice_count >= segment->slice_count ? segment->slice_count : index + slice->slice_count) - 1; - if (slice->block_size > 0) { // a page in use, all slices need their back offset set + size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1; + if (slice->block_size > 0) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets used_count++; - for (size_t i = index; i <= maxindex; i++) { - mi_assert_internal(segment->slices[i].slice_offset == (i - index)*sizeof(mi_page_t)); - mi_assert_internal(i==index || segment->slices[i].slice_count == 0); - mi_assert_internal(i==index || segment->slices[i].block_size == 1); + for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) { + mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t)); + mi_assert_internal(i==0 || segment->slices[index + i].slice_count == 0); + mi_assert_internal(i==0 || segment->slices[index + i].block_size == 1); + } + // and the last entry as well (for coalescing) + const mi_slice_t* last = slice + slice->slice_count - 1; + if (last > slice && last < mi_segment_slices_end(segment)) { + mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t)); + mi_assert_internal(last->slice_count == 0); + mi_assert_internal(last->block_size == 1); } } else { // free range of slices; only last slice needs a valid back offset - mi_slice_t* end = &segment->slices[maxindex]; - mi_assert_internal((uint8_t*)slice == (uint8_t*)end - end->slice_offset); - mi_assert_internal(slice == end || end->slice_count == 0 ); - mi_assert_internal(end->block_size == 0); - if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { - pq = mi_page_queue_for(slice->slice_count,tld); - mi_assert_internal(mi_segment_page_queue_contains(pq,mi_slice_to_page(slice))); + mi_slice_t* last = &segment->slices[maxindex]; + mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset); + mi_assert_internal(slice == last || last->slice_count == 0 ); + mi_assert_internal(last->block_size == 0); + if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { // segment is not huge or abandonded + sq = mi_span_queue_for(slice->slice_count,tld); + mi_assert_internal(mi_span_queue_contains(sq,slice)); } } slice = &segment->slices[maxindex+1]; } - mi_assert_internal(slice == &segment->slices[segment->slice_count]); + mi_assert_internal(slice == end); mi_assert_internal(used_count == segment->used + 1); return true; } @@ -191,13 +201,20 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { Segment size calculations ----------------------------------------------------------- */ +static size_t mi_segment_size(mi_segment_t* segment) { + return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; +} +static size_t mi_segment_info_size(mi_segment_t* segment) { + return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; +} + // Start of the page available memory; can be used on uninitialized pages uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); - ptrdiff_t idx = slice - segment->slices; - size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; - uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); + const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); + ptrdiff_t idx = slice - segment->slices; + size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; + uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); /* if (idx == 0) { // the first page starts after the segment info (and possible guard page) @@ -216,7 +233,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa */ long secure = mi_option_get(mi_option_secure); - if (secure > 1 || (secure == 1 && slice == &segment->slices[segment->slice_count - 1])) { + if (secure > 1 || (secure == 1 && slice == &segment->slices[segment->slice_entries - 1])) { // secure == 1: the last page has an os guard page at the end // secure > 1: every page has an os guard page psize -= _mi_os_page_size(); @@ -228,7 +245,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa return p; } -static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_size) { +static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) { size_t page_size = _mi_os_page_size(); size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); size_t guardsize = 0; @@ -240,12 +257,12 @@ static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_si required = _mi_align_up(required, page_size); } ; - if (info_size != NULL) *info_size = isize; - if (pre_size != NULL) *pre_size = isize + guardsize; + if (pre_size != NULL) *pre_size = isize; isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE); - size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) ); + if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE; + size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) ); mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); - return segment_size; + return (segment_size / MI_SEGMENT_SLICE_SIZE); } @@ -268,11 +285,11 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; mi_segment_map_freed_at(segment); - mi_segments_track_size(-((long)segment->segment_size),tld); + mi_segments_track_size(-((long)mi_segment_size(segment)),tld); if (mi_option_is_enabled(mi_option_secure)) { - _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set } - _mi_os_free(segment, segment->segment_size, /*segment->memid,*/ tld->stats); + _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); } @@ -282,14 +299,14 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset -static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t* tld) { - if (segment_size != 0 && segment_size != MI_SEGMENT_SIZE) return NULL; +static mi_segment_t* mi_segment_cache_pop(size_t segment_slices, mi_segments_tld_t* tld) { + if (segment_slices != 0 && segment_slices != MI_SLICES_PER_SEGMENT) return NULL; mi_segment_t* segment = tld->cache; if (segment == NULL) return NULL; tld->cache_count--; tld->cache = segment->next; segment->next = NULL; - mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT); _mi_stat_decrease(&tld->stats->segments_cache, 1); return segment; } @@ -312,12 +329,12 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) { static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->next == NULL); - if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { + if (segment->segment_slices != MI_SLICES_PER_SEGMENT || mi_segment_cache_full(tld)) { return false; } - mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT); if (mi_option_is_enabled(mi_option_cache_reset)) { - _mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); + _mi_os_reset((uint8_t*)segment + mi_segment_info_size(segment), mi_segment_size(segment) - mi_segment_info_size(segment), tld->stats); } segment->next = tld->cache; tld->cache = segment; @@ -337,182 +354,232 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } -/* ----------------------------------------------------------- - Slices ------------------------------------------------------------ */ - - -static uint8_t* mi_slice_start(const mi_slice_t* slice) { - mi_segment_t* segment = _mi_ptr_segment(slice); - return ((uint8_t*)segment + (mi_slice_index(slice)*MI_SEGMENT_SLICE_SIZE)); -} - -static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) { - return &segment->slices[segment->slice_count-1]; -} - -static size_t mi_slices_in(size_t size) { - return (size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE; -} /* ----------------------------------------------------------- - Page management + Span management ----------------------------------------------------------- */ - -static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { - mi_assert_internal(slice_index < segment->slice_count); - mi_page_queue_t* pq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_page_queue_for(slice_count,tld)); +static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(slice_index < segment->slice_entries); + mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_span_queue_for(slice_count,tld)); if (slice_count==0) slice_count = 1; - mi_assert_internal(slice_index + slice_count - 1 < segment->slice_count); + mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries); // set first and last slice (the intermediates can be undetermined) mi_slice_t* slice = &segment->slices[slice_index]; slice->slice_count = (uint32_t)slice_count; + mi_assert_internal(slice->slice_count == slice_count); // no overflow? slice->slice_offset = 0; if (slice_count > 1) { - mi_slice_t* end = &segment->slices[slice_index + slice_count - 1]; - end->slice_count = 0; - end->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); - end->block_size = 0; + mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; + last->slice_count = 0; + last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); + last->block_size = 0; } // and push it on the free page queue (if it was not a huge page) - if (pq != NULL) mi_page_queue_enqueue( pq, mi_slice_to_page(slice) ); + if (sq != NULL) mi_span_queue_push( sq, slice ); else slice->block_size = 0; // mark huge page as free anyways } -static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) { - mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0); - size_t slice_index = mi_slice_index(mi_page_to_slice(page)); - mi_segment_page_init(segment,slice_index,page->slice_count,tld); +// called from reclaim to add existing free spans +static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) { + mi_segment_t* segment = _mi_ptr_segment(slice); + mi_assert_internal(slice->block_size==0 && slice->slice_count>0 && slice->slice_offset==0); + size_t slice_index = mi_slice_index(slice); + mi_segment_span_free(segment,slice_index,slice->slice_count,tld); +} +static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) { + mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0); + mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE); + mi_span_queue_t* sq = mi_span_queue_for(slice->slice_count, tld); + mi_span_queue_delete(sq, slice); } -static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segments_tld_t* tld) { - mi_assert_internal(page->slice_count >= slice_count); - mi_assert_internal(page->block_size > 0); // no more in free queue - if (page->slice_count <= slice_count) return; - mi_segment_t* segment = _mi_page_segment(page); +static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) { + mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0 && slice->block_size > 0); + mi_segment_t* segment = _mi_ptr_segment(slice); + mi_assert_internal(segment->used > 0); + segment->used--; + + // for huge pages, just mark as free but don't add to the queues + if (segment->kind == MI_SEGMENT_HUGE) { + mi_assert_internal(segment->used == 0); + slice->block_size = 0; // mark as free anyways + return slice; + } + + // otherwise coalesce the span and add to the free span queues + size_t slice_count = slice->slice_count; + mi_slice_t* next = slice + slice->slice_count; + mi_assert_internal(next <= mi_segment_slices_end(segment)); + if (next < mi_segment_slices_end(segment) && next->block_size==0) { + // free next block -- remove it from free and merge + mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); + slice_count += next->slice_count; // extend + mi_segment_span_remove_from_queue(next, tld); + } + if (slice > segment->slices) { + mi_slice_t* prev = mi_slice_first(slice - 1); + mi_assert_internal(prev >= segment->slices); + if (prev->block_size==0) { + // free previous slice -- remove it from free and merge + mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); + slice_count += prev->slice_count; + mi_segment_span_remove_from_queue(prev, tld); + slice = prev; + } + } + + // and add the new free page + mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + return slice; +} + + +static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(_mi_ptr_segment(slice)==segment); + mi_assert_internal(slice->slice_count >= slice_count); + mi_assert_internal(slice->block_size > 0); // no more in free queue + if (slice->slice_count <= slice_count) return; mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count; - size_t next_count = page->slice_count - slice_count; - mi_segment_page_init( segment, next_index, next_count, tld ); - page->slice_count = (uint32_t)slice_count; + size_t next_index = mi_slice_index(slice) + slice_count; + size_t next_count = slice->slice_count - slice_count; + mi_segment_span_free(segment, next_index, next_count, tld); + slice->slice_count = (uint32_t)slice_count; } -static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { + +static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count) { + mi_assert_internal(slice_index < segment->slice_entries); + mi_slice_t* slice = &segment->slices[slice_index]; + mi_assert_internal(slice->block_size==0 || slice->block_size==1); + slice->slice_offset = 0; + slice->slice_count = (uint32_t)slice_count; + mi_assert_internal(slice->slice_count == slice_count); + slice->block_size = slice_count * MI_SEGMENT_SLICE_SIZE; + mi_page_t* page = mi_slice_to_page(slice); + + // set slice back pointers for the first MI_MAX_SLICE_OFFSET entries + size_t extra = slice_count-1; + if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET; + if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1; // huge objects may have more slices than avaiable entries in the segment->slices + slice++; + for (size_t i = 1; i <= extra; i++, slice++) { + slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i); + slice->slice_count = 0; + slice->block_size = 1; + } + + // and also for the last one (if not set already) (the last one is needed for coalescing) + mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; + if (last < mi_segment_slices_end(segment) && last >= slice) { + last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1)); + last->slice_count = 0; + last->block_size = 1; + } + + segment->used++; + return page; +} + +static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX); // search from best fit up - mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld); + mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld); if (slice_count == 0) slice_count = 1; - while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX]) { - for( mi_page_t* page = pq->first; page != NULL; page = page->next) { - if (page->slice_count >= slice_count) { + while (sq <= &tld->spans[MI_SEGMENT_BIN_MAX]) { + for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) { + if (slice->slice_count >= slice_count) { // found one - mi_page_queue_delete(pq,page); - if (page->slice_count > slice_count) { - mi_segment_page_split(page,slice_count,tld); + mi_span_queue_delete(sq, slice); + mi_segment_t* segment = _mi_ptr_segment(slice); + if (slice->slice_count > slice_count) { + mi_segment_slice_split(segment, slice, slice_count, tld); } - mi_assert_internal(page != NULL && page->slice_count == slice_count); - return page; + mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->block_size > 0); + return mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count); } } - pq++; + sq++; } // could not find a page.. return NULL; } -static void mi_segment_page_delete(mi_slice_t* slice, mi_segments_tld_t* tld) { - mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0); - mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE); - mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld); - mi_page_queue_delete(pq, mi_slice_to_page(slice)); -} - /* ----------------------------------------------------------- Segment allocation ----------------------------------------------------------- */ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) { // calculate needed sizes first - size_t info_size; + size_t info_slices; size_t pre_size; - size_t segment_size = mi_segment_size(required, &pre_size, &info_size); - size_t slice_count = mi_slices_in(segment_size); - if (slice_count > MI_SLICES_PER_SEGMENT) slice_count = MI_SLICES_PER_SEGMENT; - mi_assert_internal(segment_size - _mi_align_up(sizeof(mi_segment_t),MI_SEGMENT_SLICE_SIZE) >= required); - mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); - //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0); + size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices); + size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); + size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE; // Try to get it from our thread local cache first bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit) || required > 0; // huge page - mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); + mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld); if (segment==NULL) { // Allocate the segment from the OS - size_t memid = 0; segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, /* &memid,*/ os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { - _mi_os_commit(segment, info_size, tld->stats); + _mi_os_commit(segment, info_slices*MI_SEGMENT_SLICE_SIZE, tld->stats); } - segment->memid = memid; - mi_segments_track_size((long)segment_size, tld); + mi_segments_track_size((long)(segment_size), tld); mi_segment_map_allocated_at(segment); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - // zero the segment info - { size_t memid = segment->memid; - memset(segment, 0, info_size); - segment->memid = memid; - } + // zero the segment info? -- not needed as it is zero initialized from the OS + // memset(segment, 0, info_size); if (mi_option_is_enabled(mi_option_secure)) { // in secure mode, we set up a protected page in between the segment info // and the page data - mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_os_protect((uint8_t*)segment + info_size, (pre_size - info_size)); size_t os_page_size = _mi_os_page_size(); + size_t info_size = (info_slices * MI_SEGMENT_SLICE_SIZE); + mi_assert_internal(info_size - os_page_size >= pre_size); + _mi_os_protect((uint8_t*)segment + info_size - os_page_size, os_page_size); // and protect the last page too _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); - slice_count--; // don't use the last slice :-( + if (slice_entries == segment_slices) slice_entries--; // don't use the last slice :-( } // initialize segment info - segment->segment_size = segment_size; - segment->segment_info_size = pre_size; + segment->segment_slices = segment_slices; + segment->segment_info_slices = info_slices; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - segment->slice_count = slice_count; - segment->all_committed = commit; + segment->slice_entries = slice_entries; + segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); - _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); // reserve first slices for segment info - size_t islice_count = (segment->segment_info_size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE; - for (size_t i = 0; i < islice_count; i++) { - mi_slice_t* slice = &segment->slices[i]; - if (i==0) { - slice->slice_count = (uint32_t)islice_count; - slice->block_size = islice_count * MI_SEGMENT_SLICE_SIZE; - } - else { - slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i); - slice->block_size = 1; - } - } - + mi_segment_span_allocate(segment,0,info_slices); + mi_assert_internal(segment->used == 1); + segment->used = 0; // don't count our internal slices towards usage + // initialize initial free pages if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page - mi_segment_page_init(segment, islice_count, segment->slice_count - islice_count, tld); + mi_assert_internal(huge_page==NULL); + mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, tld); } + else { + mi_assert_internal(huge_page!=NULL); + *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices); + } + return segment; } @@ -520,18 +587,18 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(segment->next == NULL); - mi_assert_internal(segment->prev == NULL); mi_assert_internal(segment->used == 0); // Remove the free pages mi_slice_t* slice = &segment->slices[0]; + const mi_slice_t* end = mi_segment_slices_end(segment); size_t page_count = 0; - while (slice <= mi_segment_last_slice(segment)) { + while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages .. if (slice->block_size == 0 && segment->kind != MI_SEGMENT_HUGE) { - mi_segment_page_delete(slice, tld); + mi_segment_span_remove_from_queue(slice, tld); } page_count++; slice = slice + slice->slice_count; @@ -539,7 +606,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_internal(page_count == 2); // first page is allocated by the segment itself // stats - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); + _mi_stat_decrease(&tld->stats->page_committed, mi_segment_info_size(segment)); if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache @@ -554,83 +621,24 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t Page allocation ----------------------------------------------------------- */ -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE); // find a free page size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE)); size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; - mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); + mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); if (page==NULL) { // no free page, allocate a new segment and try again - if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL; // OOM - return mi_segment_page_alloc(page_kind, required, tld, os_tld); + if (mi_segment_alloc(0, tld, os_tld, NULL) == NULL) return NULL; // OOM + return mi_segments_page_alloc(page_kind, required, tld, os_tld); } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); - - // set slice back pointers and commit/unreset - mi_segment_t* segment = _mi_page_segment(page); - mi_slice_t* slice = mi_page_to_slice(page); - bool commit = false; - bool unreset = false; - for (size_t i = 0; i < page->slice_count; i++, slice++) { - slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i); - slice->block_size = 1; - if (i > 0) slice->slice_count = 0; - if (!segment->all_committed && !slice->is_committed) { - slice->is_committed = true; - commit = true; - } - if (slice->is_reset) { - slice->is_reset = false; - unreset = true; - } - } - uint8_t* page_start = mi_slice_start(mi_page_to_slice(page)); - if(commit) { _mi_os_commit(page_start, page_size, tld->stats); } - if(unreset){ _mi_os_unreset(page_start, page_size, tld->stats); } - - // initialize the page and return - mi_assert_internal(segment->thread_id == _mi_thread_id()); - segment->used++; + mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); return page; } -static mi_slice_t* mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert_internal(page != NULL && page->slice_count > 0 && page->slice_offset == 0 && page->block_size > 0); - mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(segment->used > 0); - segment->used--; - - // free and coalesce the page - mi_slice_t* slice = mi_page_to_slice(page); - size_t slice_count = slice->slice_count; - mi_slice_t* next = slice + slice->slice_count; - mi_assert_internal(next <= mi_segment_last_slice(segment) + 1); - if (next <= mi_segment_last_slice(segment) && next->block_size==0) { - // free next block -- remove it from free and merge - mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); - slice_count += next->slice_count; // extend - mi_segment_page_delete(next, tld); - } - if (slice > segment->slices) { - mi_slice_t* prev = mi_slice_first(slice - 1); - mi_assert_internal(prev >= segment->slices); - if (prev->block_size==0) { - // free previous slice -- remove it from free and merge - mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); - slice_count += prev->slice_count; - mi_segment_page_delete(prev, tld); - slice = prev; - } - } - - // and add the new free page - mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld); - mi_assert_expensive(mi_segment_is_valid(segment,tld)); - return slice; -} /* ----------------------------------------------------------- @@ -643,7 +651,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld mi_assert_internal(page->block_size > 0); mi_assert_internal(mi_page_all_free(page)); mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment->all_committed || page->is_committed); + size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -667,20 +675,13 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld page->block_size = 1; // and free it - if (segment->kind != MI_SEGMENT_HUGE) { - return mi_segment_page_free_coalesce(page, tld); - } - else { - mi_assert_internal(segment->used == 1); - segment->used--; - page->block_size = 0; // pretend free - return mi_page_to_slice(page); - } + return mi_segment_span_free_coalesce(mi_page_to_slice(page), tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) { mi_assert(page != NULL); + mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment,tld)); @@ -717,11 +718,12 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { // remove the free pages from our lists mi_slice_t* slice = &segment->slices[0]; - while (slice <= mi_segment_last_slice(segment)) { + const mi_slice_t* end = mi_segment_slices_end(segment); + while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); if (slice->block_size == 0) { // a free page - mi_segment_page_delete(slice,tld); + mi_segment_span_remove_from_queue(slice,tld); slice->block_size = 0; // but keep it free } slice = slice + slice->slice_count; @@ -729,8 +731,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { // add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); - mi_segments_track_size(-((long)segment->segment_size), tld); - + mi_segments_track_size(-((long)mi_segment_size(segment)), tld); segment->thread_id = 0; mi_segment_t* next; do { @@ -778,19 +779,19 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned_next = NULL; segment->thread_id = _mi_thread_id(); - mi_segments_track_size((long)segment->segment_size,tld); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_segments_track_size((long)mi_segment_size(segment),tld); + mi_assert_internal(segment->next == NULL); _mi_stat_decrease(&tld->stats->segments_abandoned,1); mi_slice_t* slice = &segment->slices[0]; + const mi_slice_t* end = mi_segment_slices_end(segment); mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page slice = slice + slice->slice_count; // skip the first segment allocated page - while (slice <= mi_segment_last_slice(segment)) { + while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); - mi_page_t* page = mi_slice_to_page(slice); - if (page->block_size == 0) { // a free page, add it to our lists - mi_segment_page_add_free(page,tld); + if (slice->block_size == 0) { // a free page, add it to our lists + mi_segment_span_add_free(slice,tld); } slice = slice + slice->slice_count; } @@ -798,7 +799,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen slice = &segment->slices[0]; mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page slice = slice + slice->slice_count; // skip the first segment allocated page - while (slice <= mi_segment_last_slice(segment)) { + while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); mi_page_t* page = mi_slice_to_page(slice); @@ -837,27 +838,11 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld); - if (segment == NULL) return NULL; - mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); - segment->used = 1; - - mi_page_t* page = mi_slice_to_page(&segment->slices[0]); - mi_assert_internal(page->block_size > 0 && page->slice_count > 0); - size_t initial_count = page->slice_count; - page = page + initial_count; - page->slice_count = (uint32_t)((segment->segment_size - segment->segment_info_size)/MI_SEGMENT_SLICE_SIZE); - page->slice_offset = 0; - page->block_size = size; - mi_assert_internal(page->slice_count * MI_SEGMENT_SLICE_SIZE >= size); - mi_assert_internal(page->slice_count >= segment->slice_count - initial_count); - // set back pointers - for (size_t i = 1; i slice_count; i++) { - mi_slice_t* slice = (mi_slice_t*)(page + i); - slice->slice_offset = (uint32_t)(sizeof(mi_page_t)*i); - slice->block_size = 1; - slice->slice_count = 0; - } + mi_page_t* page = NULL; + mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page); + if (segment == NULL || page==NULL) return NULL; + mi_assert_internal(segment->used==1); + mi_assert_internal(page->block_size >= size); return page; } @@ -874,13 +859,13 @@ static bool mi_is_good_fit(size_t bsize, size_t size) { mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {// || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) { - page = mi_segment_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld); + page = mi_segments_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {// || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { - page = mi_segment_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld); + page = mi_segments_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld); } else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { - page = mi_segment_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld); + page = mi_segments_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld); } else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); @@ -894,12 +879,12 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ The following functions are to reliably find the segment or block that encompasses any pointer p (or NULL if it is not in any of our segments). - We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (128mb) + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) set to 1 if it contains the segment meta data. ----------------------------------------------------------- */ #if (MI_INTPTR_SIZE==8) -#define MI_MAX_ADDRESS ((size_t)1 << 44) // 16TB +#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB #else #define MI_MAX_ADDRESS ((size_t)1 << 31) // 2Gb #endif @@ -908,10 +893,10 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ #define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) -static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 1KiB per TB with 128MiB segments +static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { - mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB? + mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; *bitidx = segindex % (8*MI_INTPTR_SIZE); return (segindex / (8*MI_INTPTR_SIZE)); @@ -948,13 +933,14 @@ static mi_segment_t* _mi_segment_of(const void* p) { mi_segment_t* segment = _mi_ptr_segment(p); size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); - // fast path: for any pointer to valid small/medium/large object or first 4MiB in huge + // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge if (mi_likely((mi_segment_map[index] & ((uintptr_t)1 << bitidx)) != 0)) { return segment; // yes, allocated by us } if (index==0) return NULL; // search downwards for the first segment in case it is an interior pointer - // could be slow but searches in 256MiB steps trough valid huge objects + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (4GiB) steps trough + // valid huge objects // note: we could maintain a lowest index to speed up the path for invalid pointers? size_t lobitidx; size_t loindex; @@ -978,8 +964,8 @@ static mi_segment_t* _mi_segment_of(const void* p) { bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(cookie_ok); if (mi_unlikely(!cookie_ok)) return NULL; - if (((uint8_t*)segment + segment->segment_size) <= (uint8_t*)p) return NULL; // outside the range - mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + segment->segment_size); + if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); return segment; } diff --git a/test/test-stress.c b/test/test-stress.c index ad487538..a4f223e2 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -63,7 +63,11 @@ static bool chance(size_t perc, random_t r) { } static void* alloc_items(size_t items, random_t r) { - if (chance(1, r)) items *= 100; // 1% huge objects; + if (chance(1, r)) { + if (chance(1,r)) items *= 1000; // 0.01% giant + else if (chance(10,r)) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; + } if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; From 6f5492cef801badbfb8cb7a2acfdbd5295590f22 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 24 Aug 2019 15:00:55 -0700 Subject: [PATCH 009/352] enable initial lazy commit and optional decommit to reduce commit charge with many threads --- include/mimalloc-internal.h | 5 ++ include/mimalloc-types.h | 6 +- include/mimalloc.h | 6 +- src/options.c | 6 +- src/os.c | 14 ++-- src/segment.c | 134 +++++++++++++++++++++++++++--------- 6 files changed, 125 insertions(+), 46 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index bf5c2e04..ce9f6d07 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -191,6 +191,11 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { } } +static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { + return (sz / alignment) * alignment; +} + + // Align a byte size to a size in _machine words_, // i.e. byte size == `wsize*sizeof(void*)`. static inline size_t _mi_wsize_from_size(size_t size) { diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 78b643ad..81ac9d54 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -84,7 +84,7 @@ terms of the MIT license. A copy of the license can be found in the file // Derived constants #define MI_SEGMENT_SIZE ((size_t)1< MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; -#define MI_COMMIT_SIZE ((size_t)2 << 20) // OS large page size +#define MI_COMMIT_SIZE (2UL<<20) // OS large page size -#if ((MI_SEGMENT_SIZE / MI_COMMIT_SIZE) > MI_INTPTR_SIZE) +#if ((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE > 8*MI_INTPTR_SIZE) #error "not enough commit bits to cover the segment size" #endif diff --git a/include/mimalloc.h b/include/mimalloc.h index 7000cd42..c1a3bbe6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -225,9 +225,9 @@ typedef enum mi_option_e { mi_option_verbose, // the following options are experimental mi_option_secure, - mi_option_eager_commit, - mi_option_eager_region_commit, - mi_option_large_os_pages, // implies eager commit + mi_option_lazy_commit, + mi_option_decommit, + mi_option_large_os_pages, mi_option_reserve_huge_os_pages, mi_option_page_reset, mi_option_cache_reset, diff --git a/src/options.c b/src/options.c index ff65c3f5..c56b6bd7 100644 --- a/src/options.c +++ b/src/options.c @@ -55,12 +55,12 @@ static mi_option_desc_t options[_mi_option_last] = #endif // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // note: if eager_region_commit is on, this should be on too. #ifdef _WIN32 // and BSD? - { 1, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) + { 1, UNINIT, MI_OPTION(lazy_commit) }, #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, + { 0, UNINIT, MI_OPTION(lazy_commit) }, #endif + { 0, UNINIT, MI_OPTION(decommit) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(page_reset) }, diff --git a/src/os.c b/src/os.c index a1b6cdf3..9ad595d5 100644 --- a/src/os.c +++ b/src/os.c @@ -44,10 +44,6 @@ static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); } -static uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { - return (sz / alignment) * alignment; -} - static void* mi_align_down_ptr(void* p, size_t alignment) { return (void*)_mi_align_down((uintptr_t)p, alignment); } @@ -195,10 +191,14 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) } #ifdef _WIN32 + +#define MEM_COMMIT_RESERVE (MEM_COMMIT|MEM_RESERVE) + static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if ((size % (uintptr_t)1 << 30) == 0 /* 1GiB multiple */ + if ((flags&MEM_COMMIT_RESERVE)==MEM_COMMIT_RESERVE + && (size % (uintptr_t)1 << 30) == 0 /* 1GiB multiple */ && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0) && pNtAllocateVirtualMemoryEx != NULL) @@ -250,7 +250,9 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only) { static volatile uintptr_t large_page_try_ok = 0; void* p = NULL; - if (large_only || use_large_os_page(size, try_alignment)) { + if ((flags&MEM_COMMIT_RESERVE) == MEM_COMMIT_RESERVE + && (large_only || use_large_os_page(size, try_alignment))) + { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. diff --git a/src/segment.c b/src/segment.c index 3b3272a1..54de294f 100644 --- a/src/segment.c +++ b/src/segment.c @@ -39,7 +39,7 @@ static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) { return &segment->slices[segment->slice_entries]; } -/* + static uint8_t* mi_slice_start(const mi_slice_t* slice) { mi_segment_t* segment = _mi_ptr_segment(slice); mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment)); @@ -47,11 +47,6 @@ static uint8_t* mi_slice_start(const mi_slice_t* slice) { } -static size_t mi_slices_in(size_t size) { - return (size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE; -} -*/ - /* ----------------------------------------------------------- Bins ----------------------------------------------------------- */ @@ -359,6 +354,68 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { Span management ----------------------------------------------------------- */ +static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size) { + mi_assert_internal(_mi_ptr_segment(p) == segment); + if (size == 0 || size > MI_SEGMENT_SIZE) return 0; + if (p >= (uint8_t*)segment + mi_segment_size(segment)) return 0; + + uintptr_t diff = (p - (uint8_t*)segment); + uintptr_t start; + uintptr_t end; + if (conservative) { + start = _mi_align_up(diff, MI_COMMIT_SIZE); + end = _mi_align_down(diff + size, MI_COMMIT_SIZE); + } + else { + start = _mi_align_down(diff, MI_COMMIT_SIZE); + end = _mi_align_up(diff + size, MI_COMMIT_SIZE); + } + mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0); + *start_p = (uint8_t*)segment + start; + *full_size = (end > start ? end - start : 0); + + uintptr_t bitidx = start / MI_COMMIT_SIZE; + mi_assert_internal(bitidx < (MI_INTPTR_SIZE*8)); + + uintptr_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 + if (bitidx + bitcount > MI_INTPTR_SIZE*8) { + _mi_warning_message("%zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size); + } + mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8)); + + uintptr_t mask = (((uintptr_t)1 << bitcount) - 1) << bitidx; + + return mask; +} + +static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { + // commit liberal, but decommit conservative + uint8_t* start; + size_t full_size; + uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size); + if (mask==0 || full_size==0) return; + + if (commit && (segment->commit_mask & mask) != mask) { + _mi_os_commit(start,full_size,stats); + segment->commit_mask |= mask; + } + else if (!commit && (segment->commit_mask & mask) != 0) { + _mi_os_decommit(start, full_size,stats); + segment->commit_mask &= ~mask; + } +} + +static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + if (~segment->commit_mask == 0) return; // fully committed + mi_segment_commitx(segment,true,p,size,stats); +} + +static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + if (!segment->allow_decommit || !mi_option_is_enabled(mi_option_decommit)) return; + if (segment->commit_mask == 1) return; // fully decommitted + mi_segment_commitx(segment, false, p, size, stats); +} + static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_span_queue_for(slice_count,tld)); @@ -376,6 +433,10 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); last->block_size = 0; } + + // perhaps decommit + mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats); + // and push it on the free page queue (if it was not a huge page) if (sq != NULL) mi_span_queue_push( sq, slice ); else slice->block_size = 0; // mark huge page as free anyways @@ -452,7 +513,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz } -static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count) { +static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_slice_t* slice = &segment->slices[slice_index]; mi_assert_internal(slice->block_size==0 || slice->block_size==1); @@ -481,6 +542,8 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i last->block_size = 1; } + // ensure the memory is committed + mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); segment->used++; return page; } @@ -500,7 +563,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm mi_segment_slice_split(segment, slice, slice_count, tld); } mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->block_size > 0); - return mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count); + return mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); } } sq++; @@ -524,49 +587,58 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE; - // Try to get it from our thread local cache first - bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit) - || required > 0; // huge page + // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) + size_t lazy = (size_t)mi_option_get(mi_option_lazy_commit); + bool commit_lazy = (lazy > tld->count) && required == 0; // lazy, and not a huge page + + // Try to get from our cache first mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld); if (segment==NULL) { // Allocate the segment from the OS - segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, /* &memid,*/ os_tld); + segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, !commit_lazy, /* &memid,*/ os_tld); if (segment == NULL) return NULL; // failed to allocate - if (!commit) { - _mi_os_commit(segment, info_slices*MI_SEGMENT_SLICE_SIZE, tld->stats); + mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); + if (commit_lazy) { + // at least commit the info slices + mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); + _mi_os_commit(segment, MI_COMMIT_SIZE, tld->stats); } mi_segments_track_size((long)(segment_size), tld); mi_segment_map_allocated_at(segment); } - mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); // zero the segment info? -- not needed as it is zero initialized from the OS // memset(segment, 0, info_size); - if (mi_option_is_enabled(mi_option_secure)) { - // in secure mode, we set up a protected page in between the segment info - // and the page data - size_t os_page_size = _mi_os_page_size(); - size_t info_size = (info_slices * MI_SEGMENT_SLICE_SIZE); - mi_assert_internal(info_size - os_page_size >= pre_size); - _mi_os_protect((uint8_t*)segment + info_size - os_page_size, os_page_size); - // and protect the last page too - _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); - if (slice_entries == segment_slices) slice_entries--; // don't use the last slice :-( - } - + // initialize segment info + memset(segment,0,offsetof(mi_segment_t,slices)); segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); segment->slice_entries = slice_entries; - segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); + segment->allow_decommit = commit_lazy; + segment->commit_mask = (commit_lazy ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed + memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); + // set up guard pages + if (mi_option_is_enabled(mi_option_secure)) { + // in secure mode, we set up a protected page in between the segment info + // and the page data + size_t os_page_size = _mi_os_page_size(); + mi_assert_internal(mi_segment_info_size(segment) - os_page_size >= pre_size); + _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_page_size, os_page_size); + uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_page_size; + mi_segment_ensure_committed(segment, end, os_page_size, tld->stats); + _mi_os_protect(end, os_page_size); + if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-( + } + // reserve first slices for segment info - mi_segment_span_allocate(segment,0,info_slices); + mi_segment_span_allocate(segment, 0, info_slices, tld); mi_assert_internal(segment->used == 1); segment->used = 0; // don't count our internal slices towards usage @@ -577,7 +649,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m } else { mi_assert_internal(huge_page!=NULL); - *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices); + *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices, tld); } return segment; @@ -886,7 +958,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ #if (MI_INTPTR_SIZE==8) #define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB #else -#define MI_MAX_ADDRESS ((size_t)1 << 31) // 2Gb +#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb #endif #define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) From 80a36f1d7cdfdd2371213720d8312b4323b2f83a Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 24 Aug 2019 17:02:32 -0700 Subject: [PATCH 010/352] reduce page retire words to 32 --- src/page.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/page.c b/src/page.c index 313bb66a..68312f81 100644 --- a/src/page.c +++ b/src/page.c @@ -387,7 +387,7 @@ void _mi_page_retire(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_all_free(page)); - + mi_page_set_has_aligned(page, false); // don't retire too often.. @@ -396,13 +396,13 @@ void _mi_page_retire(mi_page_t* page) { // is the only page left with free blocks. It is not clear // how to check this efficiently though... for now we just check // if its neighbours are almost fully used. - if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(page->block_size <= 32*MI_INTPTR_SIZE)) { if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { _mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1); return; // dont't retire after all } } - + _mi_page_free(page, mi_page_queue_of(page), false); } From c7ec30ae25178e7386fa3e202557a9f14a0ffbc0 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 30 Oct 2019 15:36:13 -0700 Subject: [PATCH 011/352] fix secure mode --- include/mimalloc-types.h | 2 +- src/segment.c | 4 +++- test/test-stress.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 91a68247..b77d77d3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. #if !defined(MI_SECURE) -#define MI_SECURE 4 +#define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode diff --git a/src/segment.c b/src/segment.c index b8db8460..7dcfcd36 100644 --- a/src/segment.c +++ b/src/segment.c @@ -226,12 +226,13 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa mi_assert_internal((uintptr_t)p % _mi_os_page_size() == 0); } */ - + /* TODO: guard pages between every slice span if (MI_SECURE > 1 || (MI_SECURE == 1 && slice == &segment->slices[segment->slice_entries - 1])) { // secure == 1: the last page has an os guard page at the end // secure > 1: every page has an os guard page psize -= _mi_os_page_size(); } + */ if (page_size != NULL) *page_size = psize; mi_assert_internal(_mi_ptr_page(p) == page); @@ -708,6 +709,7 @@ static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t requir // find a free page size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE)); size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; + mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size); mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); if (page==NULL) { // no free page, allocate a new segment and try again diff --git a/test/test-stress.c b/test/test-stress.c index e3b0f7a3..08406ec7 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -17,7 +17,7 @@ terms of the MIT license. #include // argument defaults -static int THREADS = 1; // more repeatable if THREADS <= #processors +static int THREADS = 32; // more repeatable if THREADS <= #processors static int N = 20; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors From f7d2c45af3700aa045d99e76b98139099aa4691e Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 00:40:41 -0700 Subject: [PATCH 012/352] initial experiment with fixed memory arena and sliced segments --- CMakeLists.txt | 1 + ide/vs2017/mimalloc-override.vcxproj | 1 + ide/vs2017/mimalloc-override.vcxproj.filters | 3 + ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 + ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj | 1 + include/mimalloc-internal.h | 6 + include/mimalloc-types.h | 8 +- src/memory.c | 551 ------------------- src/os.c | 4 +- src/segment.c | 21 +- test/test-stress.c | 2 +- 13 files changed, 39 insertions(+), 564 deletions(-) delete mode 100644 src/memory.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 84668eb3..f8836f20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources src/stats.c src/os.c + src/arena.c src/segment.c src/page.c src/alloc.c diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 35f84a4b..458d5e70 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -231,6 +231,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index f7ea6d4c..64bb3dbd 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -67,5 +67,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 415f87fc..219449c9 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -217,6 +217,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 0e5512bc..87f7e9e1 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -50,6 +50,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 3a9cdcae..ac559468 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -231,6 +231,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 2af40f16..f38a7a11 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -217,6 +217,7 @@ + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f4822b10..f5b11c33 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -65,6 +65,12 @@ bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + +// arena.c +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); /* // memory.c diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b77d77d3..b043bebe 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -239,12 +239,14 @@ typedef mi_page_t mi_slice_t; // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { - struct mi_segment_s* next; // the list of freed segments in the cache - volatile _Atomic(struct mi_segment_s*) abandoned_next; - + size_t memid; // memory id for arena allocation bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_committed; // `true` if the whole segment is eagerly committed + // from here is zero initialized + struct mi_segment_s* next; // the list of freed segments in the cache + volatile _Atomic(struct mi_segment_s*) abandoned_next; + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t used; // count of pages in use uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` diff --git a/src/memory.c b/src/memory.c deleted file mode 100644 index 80351edc..00000000 --- a/src/memory.c +++ /dev/null @@ -1,551 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. There may be multiple -implementations of this (one could be the identity going directly to the OS, -another could be a simple cache etc), but the current one uses large "regions". -In contrast to the rest of mimalloc, the "regions" are shared between threads and -need to be accessed using atomic operations. -We need this memory layer between the raw OS calls because of: -1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory effectively. -2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses in that - object :-( (`malloc-large` tests this). This means we need a cheaper way to - reuse memory. -3. This layer can help with a NUMA aware allocation in the future. - -Possible issues: -- (2) can potentially be addressed too with a small cache per thread which is much - simpler. Generally though that requires shrinking of huge pages, and may overuse - memory per thread. (and is not compatible with `sbrk`). -- Since the current regions are per-process, we need atomic operations to - claim blocks which may be contended -- In the worst case, we need to search the whole region map (16KiB for 256GiB) - linearly. At what point will direct OS calls be faster? Is there a way to - do this better without adding too much complexity? ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" - -#include // memset - -// Internal raw OS interface -size_t _mi_os_large_page_size(); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); -bool _mi_os_is_huge_reserved(void* p); - -// Constants -#if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map -#elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map -#else -#error "define the maximum heap space allowed for regions on this platform" -#endif - -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE - -#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) -#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) -#define MI_REGION_MAP_FULL UINTPTR_MAX - - -typedef uintptr_t mi_region_info_t; - -static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { - return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); -} - -static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { - if (is_large) *is_large = ((info&0x02) != 0); - if (is_committed) *is_committed = ((info&0x01) != 0); - return (void*)(info & ~0x03); -} - - -// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with -// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. -typedef struct mem_region_s { - volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block - volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags - volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd -} mem_region_t; - - -// The region map; 16KiB for a 256GiB HEAP_REGION_MAX -// TODO: in the future, maintain a map per NUMA node for numa aware allocation -static mem_region_t regions[MI_REGION_MAX]; - -static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions - - -/* ---------------------------------------------------------------------------- -Utility functions ------------------------------------------------------------------------------*/ - -// Blocks (of 4MiB) needed for the given size. -static size_t mi_region_block_count(size_t size) { - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); - return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; -} - -// The bit mask for a given number of blocks at a specified bit index. -static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { - mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); - return ((((uintptr_t)1 << blocks) - 1) << bitidx); -} - -// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. -static size_t mi_good_commit_size(size_t size) { - if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; - return _mi_align_up(size, _mi_os_large_page_size()); -} - -// Return if a pointer points into a region reserved by us. -bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - if (p==NULL) return false; - size_t count = mi_atomic_read_relaxed(®ions_count); - for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); - if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; - } - return false; -} - - -/* ---------------------------------------------------------------------------- -Commit from a region ------------------------------------------------------------------------------*/ - -// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, - size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) -{ - size_t mask = mi_region_block_mask(blocks,bitidx); - mi_assert_internal(mask != 0); - mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask); - mi_assert_internal(®ions[idx] == region); - - // ensure the region is reserved - mi_region_info_t info = mi_atomic_read(®ion->info); - if (info == 0) - { - bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); - bool region_large = *allow_large; - void* start = NULL; - if (region_large) { - start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); - if (start != NULL) { region_commit = true; } - } - if (start == NULL) { - start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); - } - mi_assert_internal(!(region_large && !*allow_large)); - - if (start == NULL) { - // failure to allocate from the OS! unclaim the blocks and fail - size_t map; - do { - map = mi_atomic_read_relaxed(®ion->map); - } while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map)); - return false; - } - - // set the newly allocated region - info = mi_region_info_create(start,region_large,region_commit); - if (mi_atomic_cas_strong(®ion->info, info, 0)) { - // update the region count - mi_atomic_increment(®ions_count); - } - else { - // failed, another thread allocated just before us! - // we assign it to a later slot instead (up to 4 tries). - for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { - mi_atomic_increment(®ions_count); - start = NULL; - break; - } - } - if (start != NULL) { - // free it if we didn't succeed to save it to some other region - _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); - } - // and continue with the memory at our index - info = mi_atomic_read(®ion->info); - } - } - mi_assert_internal(info == mi_atomic_read(®ion->info)); - mi_assert_internal(info != 0); - - // Commit the blocks to memory - bool region_is_committed = false; - bool region_is_large = false; - void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed); - mi_assert_internal(!(region_is_large && !*allow_large)); - mi_assert_internal(start!=NULL); - - // set dirty bits - uintptr_t m; - do { - m = mi_atomic_read(®ion->dirty_mask); - } while (!mi_atomic_cas_weak(®ion->dirty_mask, m | mask, m)); - *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range? - - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); - if (*commit && !region_is_committed) { - // ensure commit - bool commit_zero = false; - _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages) - if (commit_zero) *is_zero = true; - } - else if (!*commit && region_is_committed) { - // but even when no commit is requested, we might have committed anyway (in a huge OS page for example) - *commit = true; - } - - // and return the allocation - mi_assert_internal(blocks_start != NULL); - *allow_large = region_is_large; - *p = blocks_start; - *id = (idx*MI_REGION_MAP_BITS) + bitidx; - return true; -} - -// Use bit scan forward to quickly find the first zero bit if it is available -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanForward64(&idx, x); - #else - _BitScanForward(&idx, x); - #endif - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanReverse64(&idx, x); - #else - _BitScanReverse(&idx, x); - #endif - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#define MI_HAVE_BITSCAN -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); -} -#endif - -// Allocate `blocks` in a `region` at `idx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) -{ - mi_assert_internal(p != NULL && id != NULL); - mi_assert_internal(blocks < MI_REGION_MAP_BITS); - - const uintptr_t mask = mi_region_block_mask(blocks, 0); - const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - uintptr_t map = mi_atomic_read(®ion->map); - if (map==MI_REGION_MAP_FULL) return true; - - #ifdef MI_HAVE_BITSCAN - size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible - #else - size_t bitidx = 0; // otherwise start at 0 - #endif - uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx - - // scan linearly for a free range of zero bits - while(bitidx <= bitidx_max) { - if ((map & m) == 0) { // are the mask bits free at bitidx? - mi_assert_internal((m >> bitidx) == mask); // no overflow? - uintptr_t newmap = map | m; - mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here? - // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read(®ion->map); - continue; - } - else { - // success, we claimed the bits - // now commit the block memory -- this can still fail - return mi_region_commit_blocks(region, idx, bitidx, blocks, - size, commit, allow_large, is_zero, p, id, tld); - } - } - else { - // on to the next bit range - #ifdef MI_HAVE_BITSCAN - size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); - mi_assert_internal(shift > 0 && shift <= blocks); - #else - size_t shift = 1; - #endif - bitidx += shift; - m <<= shift; - } - } - // no error, but also no bits found - return true; -} - -// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) -{ - // check if there are available blocks in the region.. - mi_assert_internal(idx < MI_REGION_MAX); - mem_region_t* region = ®ions[idx]; - uintptr_t m = mi_atomic_read_relaxed(®ion->map); - if (m != MI_REGION_MAP_FULL) { // some bits are zero - bool ok = (*commit || *allow_large); // committing or allow-large is always ok - if (!ok) { - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info,&is_large,&is_committed); - ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation? - } - if (ok) { - return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld); - } - } - return true; // no error, but no success either -} - -/* ---------------------------------------------------------------------------- - Allocation ------------------------------------------------------------------------------*/ - -// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. -// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, - size_t* id, mi_os_tld_t* tld) -{ - mi_assert_internal(id != NULL && tld != NULL); - mi_assert_internal(size > 0); - *id = SIZE_MAX; - *is_zero = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - - // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) - if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - *is_zero = true; - return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size - } - - // always round size to OS page size multiple (so commit/decommit go over the entire range) - // TODO: use large OS page size here? - size = _mi_align_up(size, _mi_os_page_size()); - - // calculate the number of needed blocks - size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); - - // find a range of free blocks - void* p = NULL; - size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; - } - - if (p == NULL) { - // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions - for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; - } - } - - if (p == NULL) { - // we could not find a place to allocate, fall back to the os directly - _mi_warning_message("unable to allocate from region: size %zu\n", size); - *is_zero = true; - p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); - } - else { - tld->region_idx = idx; // next start of search? currently not used as we use first-fit - } - - mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); - return p; -} - - - -/* ---------------------------------------------------------------------------- -Free ------------------------------------------------------------------------------*/ - -// Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); - if (p==NULL) return; - if (size==0) return; - if (id == SIZE_MAX) { - // was a direct OS allocation, pass through - _mi_os_free(p, size, stats); - } - else { - // allocated in a region - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; - // we can align the size up to page size (as we allocate that way too) - // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); - size_t idx = (id / MI_REGION_MAP_BITS); - size_t bitidx = (id % MI_REGION_MAP_BITS); - size_t blocks = mi_region_block_count(size); - size_t mask = mi_region_block_mask(blocks, bitidx); - mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? - mem_region_t* region = ®ions[idx]; - mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? - mi_region_info_t info = mi_atomic_read(®ion->info); - bool is_large; - bool is_eager_committed; - void* start = mi_region_info_read(info,&is_large,&is_eager_committed); - mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); - mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); - if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? - - // decommit (or reset) the blocks to reduce the working set. - // TODO: implement delayed decommit/reset as these calls are too expensive - // if the memory is reused soon. - // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large) { - if (mi_option_is_enabled(mi_option_segment_reset)) { - if (!is_eager_committed && // cannot reset large pages - (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead - mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments - { - _mi_os_reset(p, size, stats); - //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? - } - } -<<<<<<< HEAD - // else { _mi_os_reset(p,size,stats); } - } -======= - } ->>>>>>> dev - if (!is_eager_committed) { - // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); - } - - // TODO: should we free empty regions? currently only done _mi_mem_collect. - // this frees up virtual address space which might be useful on 32-bit systems? - - // and unclaim - uintptr_t map; - uintptr_t newmap; - do { - map = mi_atomic_read_relaxed(®ion->map); - newmap = map & ~mask; - } while (!mi_atomic_cas_weak(®ion->map, newmap, map)); - } -} - - -/* ---------------------------------------------------------------------------- - collection ------------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_stats_t* stats) { - // free every region that has no segments in use. - for (size_t i = 0; i < regions_count; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->map) == 0) { - // if no segments used, try to claim the whole region - uintptr_t m; - do { - m = mi_atomic_read_relaxed(®ion->map); - } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); - if (m == 0) { - // on success, free the whole region (unless it was huge reserved) - bool is_eager_committed; - void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); - if (start != NULL && !_mi_os_is_huge_reserved(start)) { - _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); - } - // and release - mi_atomic_write(®ion->info,0); - mi_atomic_write(®ion->map,0); - } - } - } -} - -/* ---------------------------------------------------------------------------- - Other ------------------------------------------------------------------------------*/ - -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_commit(p, size, is_zero, stats); -} - -bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_decommit(p, size, stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_reset(p, size, stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_unreset(p, size, is_zero, stats); -} - -bool _mi_mem_protect(void* p, size_t size) { - return _mi_os_protect(p, size); -} - -bool _mi_mem_unprotect(void* p, size_t size) { - return _mi_os_unprotect(p, size); -} diff --git a/src/os.c b/src/os.c index 5e595f93..191be56c 100644 --- a/src/os.c +++ b/src/os.c @@ -868,13 +868,13 @@ static void mi_os_free_huge_reserved() { */ #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { +int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { UNUSED(pages); UNUSED(max_secs); if (pages_reserved != NULL) *pages_reserved = 0; return ENOMEM; } #else -int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept +int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept { if (pages_reserved != NULL) *pages_reserved = 0; if (max_secs==0) return ETIMEDOUT; // timeout diff --git a/src/segment.c b/src/segment.c index 7dcfcd36..8a02acac 100644 --- a/src/segment.c +++ b/src/segment.c @@ -284,7 +284,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { if (MI_SECURE>0) { _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set } - _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); + // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, tld->stats); } @@ -598,29 +599,35 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m // Try to get from our cache first mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld); + bool is_zero = false; if (segment==NULL) { // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy - segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld); + size_t memid = 0; + // segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld); + segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); if (!commit) { // at least commit the info slices mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); - bool is_zero = false; _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats); } + segment->memid = memid; segment->mem_is_fixed = mem_large; segment->mem_is_committed = commit; mi_segments_track_size((long)(segment_size), tld); mi_segment_map_allocated_at(segment); } - // zero the segment info? -- not needed as it is zero initialized from the OS - // memset(segment, 0, info_size); + // zero the segment info? -- not always needed as it is zero initialized from the OS + if (!is_zero) { + ptrdiff_t ofs = offsetof(mi_segment_t, next); + size_t prefix = offsetof(mi_segment_t, slices) - ofs; + memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices); + } // initialize segment info - memset(segment,0,offsetof(mi_segment_t,slices)); segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; segment->thread_id = _mi_thread_id(); @@ -629,7 +636,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); segment->allow_decommit = !commit; segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed - memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); + // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); // set up guard pages diff --git a/test/test-stress.c b/test/test-stress.c index 08406ec7..f60cda10 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor +static int N = 40; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor From 28cb19148c112cbfbcf3a768a18b3b4cb5a3301c Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 09:10:58 -0700 Subject: [PATCH 013/352] fixed memory arena allocation for huge pages --- ide/vs2019/mimalloc.vcxproj | 2 +- src/arena.c | 366 ++++++++++++++++++++++++++++++++++++ 2 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 src/arena.c diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index f38a7a11..6cfd76fa 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -116,7 +116,7 @@ true true ../../include - MI_DEBUG=1;%(PreprocessorDefinitions); + MI_DEBUG=2;%(PreprocessorDefinitions); CompileAsCpp false stdcpp17 diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 00000000..b25732d7 --- /dev/null +++ b/src/arena.c @@ -0,0 +1,366 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- + +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +/* ----------------------------------------------------------- + Arena allocation +----------------------------------------------------------- */ + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_SIZE +#define MI_MAX_ARENAS (64) + +// Block info: bit 0 contains the `in_use` bit, the upper bits the +// size in count of arena blocks. +typedef uintptr_t mi_block_info_t; + +// A memory arena descriptor +typedef struct mi_arena_s { + uint8_t* start; // the start of the memory area + size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + bool is_zero_init; // is the arena zero initialized? + bool is_large; // large OS page allocated + _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks + _Atomic(mi_block_info_t) blocks[1]; // `block_count` block info's +} mi_arena_t; + + +// The available arenas +static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; +static _Atomic(uintptr_t) mi_arena_count; // = 0 + + +/* ----------------------------------------------------------- + Arena allocations get a memory id where the lower 8 bits are + the arena index +1, and the upper bits the block index. +----------------------------------------------------------- */ + +// Use `SIZE_MAX` as a special id for direct OS allocated memory. +#define MI_MEMID_OS (SIZE_MAX) + +static size_t mi_memid_create(size_t arena_index, size_t block_index) { + mi_assert_internal(arena_index < 0xFE); + return ((block_index << 8) | ((arena_index+1) & 0xFF)); +} + +static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) { + mi_assert_internal(memid != MI_MEMID_OS); + mi_assert_internal(memid != 0); + *arena_index = (memid & 0xFF) - 1; + *block_index = (memid >> 8); +} + +/* ----------------------------------------------------------- + Block info +----------------------------------------------------------- */ + +static bool mi_block_is_in_use(mi_block_info_t info) { + return ((info&1) != 0); +} + +static size_t mi_block_count(mi_block_info_t info) { + return (info>>1); +} + +static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) { + return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0)); +} + + +/* ----------------------------------------------------------- + Thread safe allocation in an arena +----------------------------------------------------------- */ + +static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index) +{ + // Scan linearly through all block info's + // Skipping used ranges, coalescing free ranges on demand. + mi_assert_internal(needed_bcount > 0); + mi_assert_internal(start_idx <= arena->block_count); + mi_assert_internal(end_idx <= arena->block_count); + _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx]; + _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx]; + while (block < end) { + mi_block_info_t binfo = mi_atomic_read_relaxed(block); + size_t bcount = mi_block_count(binfo); + if (mi_block_is_in_use(binfo)) { + // in-use, skip ahead + mi_assert_internal(bcount > 0); + block += bcount; + } + else { + // free blocks + if (bcount==0) { + // optimization: + // use 0 initialized blocks at the end, to use single atomic operation + // initially to reduce contention (as we don't need to split) + if (block + needed_bcount > end) { + return NULL; // does not fit + } + else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) { + // ouch, someone else was quicker. Try again.. + continue; + } + else { + // we got it: return a pointer to the claimed memory + ptrdiff_t idx = (block - arena->blocks); + *is_zero = arena->is_zero_init; + *block_index = idx; + return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); + } + } + + mi_assert_internal(bcount>0); + if (needed_bcount > bcount) { +#if 0 // MI_NO_ARENA_COALESCE + block += bcount; // too small, skip to the next range + continue; +#else + // too small, try to coalesce + _Atomic(mi_block_info_t)* block_next = block + bcount; + if (block_next >= end) { + return NULL; // does not fit + } + mi_block_info_t binfo_next = mi_atomic_read(block_next); + size_t bcount_next = mi_block_count(binfo_next); + if (mi_block_is_in_use(binfo_next)) { + // next block is in use, cannot coalesce + block += (bcount + bcount_next); // skip ahea over both blocks + } + else { + // next block is free, try to coalesce + // first set the next one to being used to prevent dangling ranges + if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) { + // someone else got in before us.. try again + continue; + } + else { + if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) { // use strong to increase success chance + // someone claimed/coalesced the block in the meantime + // first free the next block again.. + bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong + mi_assert(ok); UNUSED(ok); + // and try again + continue; + } + else { + // coalesced! try again + // todo: we could optimize here to immediately claim the block if the + // coalesced size is a fit instead of retrying. Keep it simple for now. + continue; + } + } + } +#endif + } + else { // needed_bcount <= bcount + mi_assert_internal(needed_bcount <= bcount); + // it fits, claim the whole block + if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) { + // ouch, someone else was quicker. Try again.. + continue; + } + else { + // got it, now split off the needed part + if (needed_bcount < bcount) { + mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false)); + mi_atomic_write(block, mi_block_info_create(needed_bcount, true)); + } + // return a pointer to the claimed memory + ptrdiff_t idx = (block - arena->blocks); + *is_zero = false; + *block_index = idx; + return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); + } + } + } + } + // no success + return NULL; +} + +// Try to reduce search time by starting from bottom and wrap around. +static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index) +{ + uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom); + void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index); + if (p == NULL && bottom > 0) { + // try again from the start + p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index); + } + if (p != NULL) { + mi_atomic_write(&arena->block_bottom, *block_index); + } + return p; +} + +/* ----------------------------------------------------------- + Arena Allocation +----------------------------------------------------------- */ + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = MI_MEMID_OS; + *is_zero = false; + bool default_large = false; + if (large==NULL) large = &default_large; // ensure `large != NULL` + + // try to allocate in an arena if the alignment is small enough + // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`. + if (alignment <= MI_SEGMENT_ALIGN && + size >= 3*(MI_ARENA_BLOCK_SIZE/4) && // > 48MiB (not more than 25% waste) + !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <64MiB - 96MiB> + ) + { + size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); + size_t bcount = asize / MI_ARENA_BLOCK_SIZE; + + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + for (size_t i = 0; i < MI_MAX_ARENAS; i++) { + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + if (arena==NULL) break; + size_t block_index = SIZE_MAX; + void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index); + if (p != NULL) { + mi_assert_internal(block_index != SIZE_MAX); + #if MI_DEBUG>=1 + _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; + mi_block_info_t binfo = mi_atomic_read(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); + #endif + *memid = mi_memid_create(i, block_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + mi_assert_internal((uintptr_t)p % alignment == 0); + return p; + } + } + } + + // fall back to the OS + *is_zero = true; + *memid = MI_MEMID_OS; + return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); +} + +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld); +} + +/* ----------------------------------------------------------- + Arena free +----------------------------------------------------------- */ + +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); + if (p==NULL) return; + if (size==0) return; + if (memid == MI_MEMID_OS) { + // was a direct OS allocation, pass through + _mi_os_free(p, size, stats); + } + else { + mi_assert_internal(memid != 0); + // allocated in an arena + size_t arena_idx; + size_t block_idx; + mi_memid_indices(memid, &arena_idx, &block_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); + mi_assert_internal(arena != NULL); + if (arena == NULL) { + _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + mi_assert_internal(arena->block_count > block_idx); + if (arena->block_count <= block_idx) { + _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx]; + mi_block_info_t binfo = mi_atomic_read_relaxed(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); + if (!mi_block_is_in_use(binfo)) { + _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); + return; + }; + bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo); + mi_assert_internal(ok); + if (!ok) { + _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo); + } + if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) { + mi_atomic_write(&arena->block_bottom, block_idx); + } + } +} + +/* ----------------------------------------------------------- + Add an arena. +----------------------------------------------------------- */ + +static bool mi_arena_add(mi_arena_t* arena) { + mi_assert_internal(arena != NULL); + mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); + mi_assert_internal(arena->block_count > 0); + mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t))); + + uintptr_t i = mi_atomic_addu(&mi_arena_count,1); + if (i >= MI_MAX_ARENAS) { + mi_atomic_subu(&mi_arena_count, 1); + return false; + } + mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena); + return true; +} + + +/* ----------------------------------------------------------- + Reserve a huge page arena. + TODO: improve OS api to just reserve and claim a huge + page area at once, (and return the total size). +----------------------------------------------------------- */ + +#include + +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + size_t pages_reserved_default = 0; + if (pages_reserved==NULL) pages_reserved = &pages_reserved_default; + int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved); + if (*pages_reserved==0) return err; + size_t hsize = (*pages_reserved) * GiB; + void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN); + mi_assert_internal(p != NULL); + if (p == NULL) return ENOMEM; + size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; + size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats); + if (arena == NULL) return ENOMEM; + arena->block_count = bcount; + arena->start = (uint8_t*)p; + arena->block_bottom = 0; + arena->is_large = true; + arena->is_zero_init = true; + memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); + //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false)); + mi_arena_add(arena); + return 0; +} \ No newline at end of file From ed4f60fc7e5bfb17e0e7b4cc6bdd6e7102637d16 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 10:59:40 -0700 Subject: [PATCH 014/352] respect large pages for arena allocation --- src/arena.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/arena.c b/src/arena.c index b25732d7..63f08737 100644 --- a/src/arena.c +++ b/src/arena.c @@ -232,21 +232,23 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* for (size_t i = 0; i < MI_MAX_ARENAS; i++) { mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); if (arena==NULL) break; - size_t block_index = SIZE_MAX; - void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index); - if (p != NULL) { - mi_assert_internal(block_index != SIZE_MAX); - #if MI_DEBUG>=1 - _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; - mi_block_info_t binfo = mi_atomic_read(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); - #endif - *memid = mi_memid_create(i, block_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; - mi_assert_internal((uintptr_t)p % alignment == 0); - return p; + if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages + size_t block_index = SIZE_MAX; + void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index); + if (p != NULL) { + mi_assert_internal(block_index != SIZE_MAX); + #if MI_DEBUG>=1 + _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; + mi_block_info_t binfo = mi_atomic_read(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); + #endif + * memid = mi_memid_create(i, block_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + mi_assert_internal((uintptr_t)p % alignment == 0); + return p; + } } } } From 6695f8ae91c30615f009114b818ccfbccee8b122 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 10:59:50 -0700 Subject: [PATCH 015/352] add allow_decommit option --- include/mimalloc.h | 2 +- src/options.c | 6 +----- src/segment.c | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index b63ed79d..a9c339e9 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -261,7 +261,6 @@ typedef enum mi_option_e { mi_option_verbose, // the following options are experimental mi_option_eager_commit, - mi_option_eager_region_commit, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_segment_cache, @@ -269,6 +268,7 @@ typedef enum mi_option_e { mi_option_cache_reset, mi_option_reset_decommits, mi_option_eager_commit_delay, + mi_option_allow_decommit, mi_option_segment_reset, mi_option_os_tag, _mi_option_last diff --git a/src/options.c b/src/options.c index 99e90c46..31af819b 100644 --- a/src/options.c +++ b/src/options.c @@ -53,11 +53,6 @@ static mi_option_desc_t options[_mi_option_last] = // the following options are experimental and not all combinations make sense. { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled - #ifdef _WIN32 // and BSD? - { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) - #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, - #endif { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread @@ -65,6 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 0, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose }; diff --git a/src/segment.c b/src/segment.c index 8a02acac..41cf08d9 100644 --- a/src/segment.c +++ b/src/segment.c @@ -634,7 +634,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m segment->cookie = _mi_ptr_cookie(segment); segment->slice_entries = slice_entries; segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); - segment->allow_decommit = !commit; + segment->allow_decommit = !commit && mi_option_is_enabled(mi_option_allow_decommit); segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); From bbca1cd8d96bde91cd14651cc114dff907d7ed73 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 12:42:23 -0700 Subject: [PATCH 016/352] allow decommit by default --- src/options.c | 2 +- src/segment.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/options.c b/src/options.c index 31af819b..d5771705 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 0, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed + { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose }; diff --git a/src/segment.c b/src/segment.c index 41cf08d9..e88d22c5 100644 --- a/src/segment.c +++ b/src/segment.c @@ -383,12 +383,11 @@ static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative uintptr_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 if (bitidx + bitcount > MI_INTPTR_SIZE*8) { - _mi_warning_message("%zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size); + _mi_warning_message("commit mask overflow: %zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size); } mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8)); uintptr_t mask = (((uintptr_t)1 << bitcount) - 1) << bitidx; - return mask; } From a74e072a9acb23e76fd0e1a6996bef1ade5ac027 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 19:00:26 -0700 Subject: [PATCH 017/352] set test-stress scale to 20 again --- test/test-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-stress.c b/test/test-stress.c index f60cda10..08406ec7 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 40; // scaling factor +static int N = 20; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor From 268698b9efaaba6d03ad3d0840494ed63ad74d7f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 10 Nov 2019 08:00:51 -0800 Subject: [PATCH 018/352] fix vs2019 project --- test/test-stress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index 07f4a2aa..50cbf9bd 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -17,8 +17,8 @@ terms of the MIT license. #include // argument defaults -static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 40; // scaling factor +static int THREADS = 8; // more repeatable if THREADS <= #processors +static int N = 200; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor From b04206a9d32b18fa1654104548d205c9f2dfb87b Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 10 Nov 2019 10:10:10 -0800 Subject: [PATCH 019/352] add os cache to arena --- include/mimalloc-internal.h | 2 +- include/mimalloc-types.h | 2 +- src/arena.c | 115 +++++++++++++++++++++++++++++++++--- src/segment.c | 2 +- test/test-stress.c | 8 +-- 5 files changed, 115 insertions(+), 14 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 10528877..afa265f5 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -68,7 +68,7 @@ bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) // arena.c void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats); // "segment.c" diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index cd9c5154..8203bc3b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -108,7 +108,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) // 64kb on 64-bit -#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/4) // 16mb on 64-bit +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32mb on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) // Minimal alignment necessary. On most platforms 16 bytes are needed diff --git a/src/arena.c b/src/arena.c index 93655033..200e1ed7 100644 --- a/src/arena.c +++ b/src/arena.c @@ -52,9 +52,9 @@ int _mi_os_numa_node_count(void); // size in count of arena blocks. typedef uintptr_t mi_block_info_t; #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE/2) // 32MiB -#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB +#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_ALIGN // 64MiB +#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 4GiB +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor @@ -118,6 +118,98 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* } +/* ----------------------------------------------------------- + Arena cache +----------------------------------------------------------- */ +#define MI_CACHE_MAX (8) +#define MI_MAX_NUMA (64) + +#define MI_SLOT_IN_USE ((void*)1) + +typedef struct mi_cache_slot_s { + volatile _Atomic(void*) p; + volatile size_t memid; + volatile bool is_committed; + volatile bool is_large; +} mi_cache_slot_t; + +static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; + +static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { + // only segment blocks + if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL; + + // set numa range + int numa_min = numa_node; + int numa_max = numa_min; + if (numa_node < 0) { + numa_min = 0; + numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA; + } + else { + if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; + numa_min = numa_max = numa_node; + } + + // find a free slot + mi_cache_slot_t* slot; + for (int n = numa_min; n <= numa_max; n++) { + for (int i = 0; i < MI_CACHE_MAX; i++) { + slot = &cache[n][i]; + void* p = mi_atomic_read_ptr_relaxed(&slot->p); + if (p > MI_SLOT_IN_USE) { // not NULL or 1 + if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) { + // claimed + if (!*large && slot->is_large) { + // back out again + mi_atomic_write_ptr(&slot->p, p); // make it available again + } + else { + // keep it + *memid = slot->memid; + *large = slot->is_large; + *is_zero = false; + bool committed = slot->is_committed; + mi_atomic_write_ptr(&slot->p, NULL); // set it free + if (*commit && !committed) { + bool commit_zero; + _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); + } + *commit = committed; + return p; + } + } + } + } + } + return NULL; +} + +static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large) { + // only for segment blocks + if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; + + // try to add it to the cache + int numa_node = _mi_os_numa_node(NULL); + if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; + mi_cache_slot_t* slot; + for (int i = 0; i < MI_CACHE_MAX; i++) { + slot = &cache[numa_node][i]; + void* p = mi_atomic_read_ptr_relaxed(&slot->p); + if (p == NULL) { // free slot + if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) { + // claimed! + slot->memid = memid; + slot->is_committed = is_committed; + slot->is_large = is_large; + mi_atomic_write_ptr(&slot->p, start); // and make it available; + return true; + } + } + } + return false; +} + /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ @@ -148,6 +240,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` + const int numa_node = _mi_os_numa_node(tld); // current numa node + // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. if (alignment <= MI_SEGMENT_ALIGN && @@ -155,8 +249,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size >= MI_ARENA_MIN_OBJ_SIZE) { const size_t bcount = mi_block_count_of_size(size); - const int numa_node = _mi_os_numa_node(tld); // current numa node - + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { @@ -184,6 +277,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, } } + // try to get from the cache + void* p = mi_cache_pop(numa_node, size, alignment, commit, large, is_zero, memid, tld); + if (p != NULL) return p; + + // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; @@ -202,13 +300,16 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, siz Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); if (p==NULL) return; if (size==0) return; + if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - _mi_os_free(p, size, stats); + if (!mi_cache_push(p, size, memid, is_committed, is_large)) { + _mi_os_free(p, size, stats); + } } else { // allocated in an arena diff --git a/src/segment.c b/src/segment.c index 99e382bc..54a0c8fe 100644 --- a/src/segment.c +++ b/src/segment.c @@ -284,7 +284,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set } // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); - _mi_arena_free(segment, mi_segment_size(segment), segment->memid, tld->stats); + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_committed || (~segment->commit_mask == 0), segment->mem_is_fixed, tld->stats); } diff --git a/test/test-stress.c b/test/test-stress.c index 50cbf9bd..3aa65f41 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------------- + /* ---------------------------------------------------------------------------- Copyright (c) 2018,2019 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. @@ -64,9 +64,9 @@ static bool chance(size_t perc, random_t r) { static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) { - if (chance(1,r)) items *= 1000; // 0.01% giant - else if (chance(10,r)) items *= 100; // 0.1% huge - else items *= 10; // 1% large objects; + if (chance(1, r)) items *= 1000; // 0.01% giant + else if (chance(10, r)) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; } if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); From 321e18777e1dcf7214d84bb22ed537af7e4832ba Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 19:53:43 -0800 Subject: [PATCH 020/352] wip: delayed decommit on segments --- include/mimalloc-internal.h | 2 +- include/mimalloc-types.h | 14 +++--- src/arena.c | 13 ++--- src/options.c | 10 ++-- src/segment.c | 95 ++++++++++++++++++++++++++++++++----- 5 files changed, 103 insertions(+), 31 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index a1f7f870..7ce8d52b 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -66,7 +66,7 @@ bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) // arena.c void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats); +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld); // "segment.c" diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index f7eafd39..d4c1e1c1 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -235,6 +235,9 @@ typedef enum mi_segment_kind_e { typedef mi_page_t mi_slice_t; +typedef int64_t mi_msecs_t; + + // Segments are large allocated memory blocks (2mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. @@ -243,6 +246,11 @@ typedef struct mi_segment_s { bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_committed; // `true` if the whole segment is eagerly committed + bool allow_decommit; + mi_msecs_t decommit_expire; + uintptr_t decommit_mask; + uintptr_t commit_mask; + // from here is zero initialized struct mi_segment_s* next; // the list of freed segments in the cache volatile _Atomic(struct mi_segment_s*) abandoned_next; @@ -254,9 +262,6 @@ typedef struct mi_segment_s { size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT` size_t segment_info_slices; // initial slices we are using segment info and possible guard pages. - bool allow_decommit; - uintptr_t commit_mask; - // layout like this to optimize access in `mi_free` mi_segment_kind_t kind; volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment @@ -415,9 +420,6 @@ typedef struct mi_span_queue_s { #define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT) -typedef int64_t mi_msecs_t; - - // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation diff --git a/src/arena.c b/src/arena.c index eecbc298..4602c42c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -188,7 +188,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co return NULL; } -static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large) { +static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) { // only for segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; @@ -202,8 +202,9 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit if (p == NULL) { // free slot if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) { // claimed! - slot->memid = memid; + // _mi_os_decommit(start, size, tld->stats); slot->is_committed = is_committed; + slot->memid = memid; slot->is_large = is_large; mi_atomic_write_ptr(&slot->p, start); // and make it available; return true; @@ -317,15 +318,15 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, siz Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) { + mi_assert_internal(size > 0 && tld->stats != NULL); if (p==NULL) return; if (size==0) return; if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - if (!mi_cache_push(p, size, memid, is_committed, is_large)) { - _mi_os_free(p, size, stats); + if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) { + _mi_os_free(p, size, tld->stats); } } else { diff --git a/src/options.c b/src/options.c index 03ee3e0c..f1d94c80 100644 --- a/src/options.c +++ b/src/options.c @@ -56,7 +56,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled + { 0, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread @@ -65,10 +65,10 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed - { 500,UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. - { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output + { 1000, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/segment.c b/src/segment.c index 473cd696..a644708a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -15,7 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file static void mi_segment_map_allocated_at(const mi_segment_t* segment); static void mi_segment_map_freed_at(const mi_segment_t* segment); - +static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); /* ----------------------------------------------------------- Segment allocation @@ -286,8 +286,12 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set } + // purge delayed decommits now + mi_segment_delayed_decommit(segment,true,tld->stats); + // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); - _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_committed || (~segment->commit_mask == 0), segment->mem_is_fixed, tld->stats); + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, + (~segment->commit_mask == 0 && segment->decommit_mask == 0), segment->mem_is_fixed, tld->os); } @@ -331,7 +335,8 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) if (segment->segment_slices != MI_SLICES_PER_SEGMENT || mi_segment_cache_full(tld)) { return false; } - + // mi_segment_delayed_decommit(segment, true, tld->stats); + // segment->decommit_mask = 0; mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT); mi_assert_internal(segment->next == NULL); segment->next = tld->cache; @@ -395,29 +400,79 @@ static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s // commit liberal, but decommit conservative uint8_t* start; size_t full_size; - uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size); + uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size); if (mask==0 || full_size==0) return; if (commit && (segment->commit_mask & mask) != mask) { bool is_zero = false; _mi_os_commit(start,full_size,&is_zero,stats); - segment->commit_mask |= mask; + segment->commit_mask |= mask; } else if (!commit && (segment->commit_mask & mask) != 0) { + mi_assert_internal((void*)start != (void*)segment); _mi_os_decommit(start, full_size,stats); segment->commit_mask &= ~mask; } + // increase expiration of reusing part of the delayed decommit + if (commit && (segment->decommit_mask & mask) != 0) { + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); + } + // always undo delayed decommits + segment->decommit_mask &= ~mask; } static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - if (~segment->commit_mask == 0) return; // fully committed + if (~segment->commit_mask == 0 && segment->decommit_mask==0) return; // fully committed mi_segment_commitx(segment,true,p,size,stats); } static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { if (!segment->allow_decommit) return; // TODO: check option_decommit? if (segment->commit_mask == 1) return; // fully decommitted - mi_segment_commitx(segment, false, p, size, stats); + if (mi_option_get(mi_option_reset_delay) == 0) { + mi_segment_commitx(segment, false, p, size, stats); + } + else { + // create mask + uint8_t* start; + size_t full_size; + uintptr_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size); + if (mask==0 || full_size==0) return; + + // update delayed commit + segment->decommit_mask |= mask; + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); + } +} + +static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { + if (segment->decommit_mask == 0) return; + mi_msecs_t now = _mi_clock_now(); + if (!force && now < segment->decommit_expire) return; + + uintptr_t mask = segment->decommit_mask; + segment->decommit_expire = 0; + segment->decommit_mask = 0; + + uintptr_t idx = 0; + while (mask != 0) { + // count ones + size_t count = 0; + while ((mask&1)==1) { + mask >>= 1; + count++; + } + // if found, decommit that sequence + if (count > 0) { + uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); + size_t size = count * MI_COMMIT_SIZE; + mi_segment_commitx(segment, false, p, size, stats); + idx += count; + } + // shift out the 0 + mask >>= 1; + idx++; + } } static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { @@ -440,7 +495,7 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size // perhaps decommit mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats); - + // and push it on the free page queue (if it was not a huge page) if (sq != NULL) mi_span_queue_push( sq, slice ); else slice->block_size = 0; // mark huge page as free anyways @@ -599,6 +654,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m // Try to get from our cache first mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld); bool is_zero = false; + bool commit_info_still_good = (segment != NULL); if (segment==NULL) { // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy @@ -614,7 +670,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m } segment->memid = memid; segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_committed = mi_option_is_enabled(mi_option_eager_commit); // commit; mi_segments_track_size((long)(segment_size), tld); mi_segment_map_allocated_at(segment); } @@ -625,7 +681,14 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m size_t prefix = offsetof(mi_segment_t, slices) - ofs; memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices); } - + + if (!commit_info_still_good) { + segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed + segment->allow_decommit = mi_option_is_enabled(mi_option_allow_decommit); + segment->decommit_expire = 0; + segment->decommit_mask = 0; + } + // initialize segment info segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; @@ -633,8 +696,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m segment->cookie = _mi_ptr_cookie(segment); segment->slice_entries = slice_entries; segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); - segment->allow_decommit = !commit && mi_option_is_enabled(mi_option_allow_decommit); - segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed + // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); @@ -723,6 +785,7 @@ static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t requir } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); + mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats); return page; } @@ -799,7 +862,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); mi_assert_expensive(mi_segment_is_valid(segment,tld)); - + // remove the free pages from our lists mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -813,6 +876,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } + // force delayed decommits + mi_segment_delayed_decommit(segment, true, tld->stats); + //segment->decommit_mask = 0; + // add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)mi_segment_size(segment)), tld); @@ -866,6 +933,8 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segments_track_size((long)mi_segment_size(segment),tld); mi_assert_internal(segment->next == NULL); _mi_stat_decrease(&tld->stats->segments_abandoned,1); + mi_assert_internal(segment->decommit_mask == 0); + mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); From 7da00c1220f77120f10e975dba881c93cb21626a Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 20:57:32 -0800 Subject: [PATCH 021/352] wip: full decommit delay, for arena cache as well --- include/mimalloc.h | 1 + src/arena.c | 55 +++++++++++++++++++++++++++++++++++++++++----- src/options.c | 5 +++-- src/segment.c | 12 +++++----- 4 files changed, 58 insertions(+), 15 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 56d54d94..2c69514b 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -275,6 +275,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_allow_decommit, mi_option_reset_delay, + mi_option_arena_reset_delay, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, diff --git a/src/arena.c b/src/arena.c index 4602c42c..c1b47073 100644 --- a/src/arena.c +++ b/src/arena.c @@ -124,16 +124,17 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* /* ----------------------------------------------------------- Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_MAX (64) +#define MI_CACHE_MAX (64) // ~4GiB #define MI_MAX_NUMA (16) #define MI_SLOT_IN_USE ((void*)1) typedef struct mi_cache_slot_s { volatile _Atomic(void*) p; - volatile size_t memid; - volatile bool is_committed; - volatile bool is_large; + volatile size_t memid; + volatile mi_msecs_t expire; + volatile bool is_committed; + volatile bool is_large; } mi_cache_slot_t; static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; @@ -188,7 +189,43 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co return NULL; } -static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) { +static void mi_cache_purge(mi_os_tld_t* tld) { + // TODO: for each numa node instead? + // if (mi_option_get(mi_option_arena_reset_delay) == 0) return; + + mi_msecs_t now = _mi_clock_now(); + int numa_node = _mi_os_numa_node(NULL); + if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; + mi_cache_slot_t* slot; + int purged = 0; + for (int i = 0; i < MI_CACHE_MAX; i++) { + slot = &cache[numa_node][i]; + void* p = mi_atomic_read_ptr_relaxed(&slot->p); + if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { + mi_msecs_t expire = slot->expire; + if (now >= expire) { + // expired, try to claim it + if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) { + // claimed! test again + if (!slot->is_committed && !slot->is_large && now >= slot->expire) { + _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats); + slot->is_committed = false; + } + // and unclaim again + mi_atomic_write_ptr(&slot->p, p); + purged++; + if (purged >= 4) break; // limit to at most 4 decommits per push + } + } + } + } +} + + +static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) +{ + mi_cache_purge(tld); + // only for segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; @@ -202,7 +239,12 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit if (p == NULL) { // free slot if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) { // claimed! - // _mi_os_decommit(start, size, tld->stats); + long delay = mi_option_get(mi_option_arena_reset_delay); + if (delay == 0 && !is_large) { + _mi_os_decommit(start, size, tld->stats); + is_committed = false; + } + slot->expire = (is_committed ? 0 : _mi_clock_now() + delay); slot->is_committed = is_committed; slot->memid = memid; slot->is_large = is_large; @@ -214,6 +256,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit return false; } + /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ diff --git a/src/options.c b/src/options.c index f1d94c80..dcee89e6 100644 --- a/src/options.c +++ b/src/options.c @@ -64,8 +64,9 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 0, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed - { 1000, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed + { 1000, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index a644708a..247ce28d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -286,8 +286,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set } - // purge delayed decommits now - mi_segment_delayed_decommit(segment,true,tld->stats); + // purge delayed decommits now? (no, leave it to the cache) + // mi_segment_delayed_decommit(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); _mi_arena_free(segment, mi_segment_size(segment), segment->memid, @@ -335,8 +335,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) if (segment->segment_slices != MI_SLICES_PER_SEGMENT || mi_segment_cache_full(tld)) { return false; } - // mi_segment_delayed_decommit(segment, true, tld->stats); - // segment->decommit_mask = 0; + // mi_segment_delayed_decommit(segment, true, tld->stats); mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT); mi_assert_internal(segment->next == NULL); segment->next = tld->cache; @@ -876,9 +875,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } - // force delayed decommits - mi_segment_delayed_decommit(segment, true, tld->stats); - //segment->decommit_mask = 0; + // force delayed decommits instead? + mi_segment_delayed_decommit(segment, false, tld->stats); // add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); From ec0005b91978171fa8124e3567da7fda070cb6a8 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 24 Nov 2019 19:06:30 -0800 Subject: [PATCH 022/352] more fine grained commit tracking per MiB --- include/mimalloc-types.h | 2 +- src/arena.c | 2 +- src/options.c | 2 +- src/segment.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index d4c1e1c1..3cdc4963 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -227,7 +227,7 @@ typedef enum mi_segment_kind_e { MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; -#define MI_COMMIT_SIZE (2UL<<20) // OS large page size +#define MI_COMMIT_SIZE (1UL<<20) // OS large page size #if ((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE > 8*MI_INTPTR_SIZE) #error "not enough commit bits to cover the segment size" diff --git a/src/arena.c b/src/arena.c index c1b47073..f3dd690f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -203,7 +203,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) { void* p = mi_atomic_read_ptr_relaxed(&slot->p); if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { mi_msecs_t expire = slot->expire; - if (now >= expire) { + if (expire != 0 && now >= expire) { // expired, try to claim it if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) { // claimed! test again diff --git a/src/options.c b/src/options.c index 6fd887a3..dcee89e6 100644 --- a/src/options.c +++ b/src/options.c @@ -64,7 +64,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 0, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed + { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 1000, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. diff --git a/src/segment.c b/src/segment.c index 042ce2b7..d0580b74 100644 --- a/src/segment.c +++ b/src/segment.c @@ -381,6 +381,7 @@ static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0); *start_p = (uint8_t*)segment + start; *full_size = (end > start ? end - start : 0); + if (*full_size == 0) return 0; uintptr_t bitidx = start / MI_COMMIT_SIZE; mi_assert_internal(bitidx < (MI_INTPTR_SIZE*8)); @@ -931,8 +932,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segments_track_size((long)mi_segment_size(segment),tld); mi_assert_internal(segment->next == NULL); _mi_stat_decrease(&tld->stats->segments_abandoned,1); - mi_assert_internal(segment->decommit_mask == 0); - + //mi_assert_internal(segment->decommit_mask == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); From 41af533a344f78858dc934e9e2994836e81adab3 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 24 Nov 2019 19:17:56 -0800 Subject: [PATCH 023/352] define commit unit in terms of segment size --- include/mimalloc-types.h | 13 +++++++------ src/options.c | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 3cdc4963..9e183ca5 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #define MI_INTPTR_SIZE (1< MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; -#define MI_COMMIT_SIZE (1UL<<20) // OS large page size +#define MI_COMMIT_SIZE (MI_SEGMENT_SIZE/MI_INTPTR_BITS) -#if ((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE > 8*MI_INTPTR_SIZE) +#if (((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE) > 8*MI_INTPTR_SIZE) #error "not enough commit bits to cover the segment size" #endif diff --git a/src/options.c b/src/options.c index dcee89e6..6fd887a3 100644 --- a/src/options.c +++ b/src/options.c @@ -64,7 +64,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed + { 0, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 1000, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. From 2808c9f4c871a32752d8e6e32fc3841cd1a0fd2e Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 13 Jan 2020 18:01:52 -0800 Subject: [PATCH 024/352] default to non-eager commit --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index d3d9f9be..328fb86e 100644 --- a/src/options.c +++ b/src/options.c @@ -56,7 +56,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand + { 0, UNINIT, MI_OPTION(eager_commit) }, // commit on demand #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory From 94bff89347715de069cce2345d1a57f6045a131b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 13 Jan 2020 20:48:18 -0800 Subject: [PATCH 025/352] ensure page reset flag is always reset --- src/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/segment.c b/src/segment.c index d94bc894..734ca1c7 100644 --- a/src/segment.c +++ b/src/segment.c @@ -602,6 +602,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i // ensure the memory is committed mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); + page->is_reset = false; segment->used++; return page; } From 88b141cf1fb1ff9ba3fd033ac9f5e6b7eae4d919 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 13 Jan 2020 20:48:37 -0800 Subject: [PATCH 026/352] ensure proper padding for the page structure --- include/mimalloc-types.h | 6 +++--- src/init.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b0b4a44f..ec382b5e 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -211,9 +211,9 @@ typedef struct mi_page_s { struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // improve page index calculation - // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words - #if (MI_INTPTR_SIZE==4) - void* padding[1]; // 12/14 words on 32-bit plain + // without padding: 11 words on 64-bit, 14 on 32-bit. Secure adds two words + #if (MI_INTPTR_SIZE==8) + void* padding[1]; // 12/14 words on 64-bit #endif } mi_page_t; diff --git a/src/init.c b/src/init.c index 1409faaa..51c18d93 100644 --- a/src/init.c +++ b/src/init.c @@ -26,7 +26,7 @@ const mi_page_t _mi_page_empty = { NULL, ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==4) + #if (MI_INTPTR_SIZE==8) , { NULL } // padding #endif }; From 0028272cf4b39aa27e44b6fd0973744604beb6d7 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 22:33:29 -0800 Subject: [PATCH 027/352] small fixes, reduced segment size, fix merge conflicts --- include/mimalloc-types.h | 2 +- src/alloc.c | 1 + src/arena.c | 30 ++++++++++++++++++++---------- src/options.c | 2 +- src/page.c | 19 ++++++++++++++++--- src/segment.c | 28 ++++++---------------------- src/static.c | 4 ---- 7 files changed, 45 insertions(+), 41 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index a5fd12b8..6685b5a7 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -89,7 +89,7 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64mb +#define MI_SEGMENT_SHIFT ( 8 + MI_SEGMENT_SLICE_SHIFT) // 64mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb diff --git a/src/alloc.c b/src/alloc.c index de8bd3d2..6370b19d 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -195,6 +195,7 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag mi_tld_t* tld = heap->tld; const size_t bsize = mi_page_block_size(page); if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_assert_internal(false); _mi_stat_decrease(&tld->stats.large, bsize); } else { diff --git a/src/arena.c b/src/arena.c index 104a7e83..4fb1364a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1,3 +1,4 @@ + /* ---------------------------------------------------------------------------- Copyright (c) 2019, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the @@ -36,7 +37,8 @@ of 256MiB in practice. // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); +// void _mi_os_free(void* p, size_t size, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); @@ -178,8 +180,11 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co if (*commit && !committed) { bool commit_zero; _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); + *commit = true; } - *commit = committed; + else { + *commit = committed; + } return p; } } @@ -207,7 +212,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) { // expired, try to claim it if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) { // claimed! test again - if (!slot->is_committed && !slot->is_large && now >= slot->expire) { + if (slot->is_committed && !slot->is_large && now >= slot->expire) { _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats); slot->is_committed = false; } @@ -239,15 +244,20 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit if (p == NULL) { // free slot if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) { // claimed! - long delay = mi_option_get(mi_option_arena_reset_delay); - if (delay == 0 && !is_large) { - _mi_os_decommit(start, size, tld->stats); - is_committed = false; - } - slot->expire = (is_committed ? 0 : _mi_clock_now() + delay); + slot->expire = 0; slot->is_committed = is_committed; slot->memid = memid; slot->is_large = is_large; + if (is_committed) { + long delay = mi_option_get(mi_option_arena_reset_delay); + if (delay == 0 && !is_large) { + _mi_os_decommit(start, size, tld->stats); + slot->is_committed = false; + } + else { + slot->expire = _mi_clock_now() + delay; + } + } mi_atomic_write_ptr(&slot->p, start); // and make it available; return true; } @@ -369,7 +379,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) { - _mi_os_free(p, size, tld->stats); + _mi_os_free_ex(p, size, is_committed, tld->stats); } } else { diff --git a/src/options.c b/src/options.c index 489f07b3..1130e2e3 100644 --- a/src/options.c +++ b/src/options.c @@ -71,7 +71,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 0, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed + { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. diff --git a/src/page.c b/src/page.c index 44f32a73..13706100 100644 --- a/src/page.c +++ b/src/page.c @@ -378,9 +378,22 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { // no more aligned blocks in here mi_page_set_has_aligned(page, false); + mi_heap_t* heap = mi_page_heap(page); + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) { + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.large, bsize); + } + else { + // not strictly necessary as we never get here for a huge page + mi_assert_internal(false); + _mi_stat_decrease(&heap->tld->stats.huge, bsize); + } + } + // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) - mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments; + mi_segments_tld_t* segments_tld = &heap->tld->segments; mi_page_queue_remove(pq, page); // and free it @@ -769,11 +782,11 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); } if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - _mi_stat_increase(&heap->tld->stats.large, block_size); + _mi_stat_increase(&heap->tld->stats.large, bsize); _mi_stat_counter_increase(&heap->tld->stats.large_count, 1); } else { - _mi_stat_increase(&heap->tld->stats.huge, block_size); + _mi_stat_increase(&heap->tld->stats.huge, bsize); _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); } } diff --git a/src/segment.c b/src/segment.c index 5ce4d7ba..b3a33d60 100644 --- a/src/segment.c +++ b/src/segment.c @@ -7,6 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" +#include "bitmap.inc.c" // mi_bsr #include // memset #include @@ -49,23 +50,7 @@ static uint8_t* mi_slice_start(const mi_slice_t* slice) { Bins ----------------------------------------------------------- */ // Use bit scan forward to quickly find the first zero bit if it is available -#if defined(_MSC_VER) -#include -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanReverse64(&idx, x); - #else - _BitScanReverse(&idx, x); - #endif - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); -} -#else +#if !defined(MI_HAVE_BITSCAN) #error "define bsr for your platform" #endif @@ -410,7 +395,7 @@ static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s } else if (!commit && (segment->commit_mask & mask) != 0) { mi_assert_internal((void*)start != (void*)segment); - _mi_os_decommit(start, full_size,stats); + _mi_os_decommit(start, full_size, stats); segment->commit_mask &= ~mask; } // increase expiration of reusing part of the delayed decommit @@ -902,8 +887,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } - // force delayed decommits instead? - mi_segment_delayed_decommit(segment, false, tld->stats); + // perform delayed decommits instead + mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); @@ -1018,7 +1003,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen if (segment->used == 0) { // due to page_clear mi_segment_free(segment,false,tld); } - + // go on segment = next; } @@ -1185,6 +1170,5 @@ static void* mi_segment_range_of(const void* p, size_t* size) { mi_reset_delayed(tld); mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); return page; ->>>>>>> dev } */ diff --git a/src/static.c b/src/static.c index bcfaa119..b3c71e02 100644 --- a/src/static.c +++ b/src/static.c @@ -16,12 +16,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "stats.c" #include "random.c" #include "os.c" -<<<<<<< HEAD //#include "memory.c" -======= #include "arena.c" -#include "memory.c" ->>>>>>> dev #include "segment.c" #include "page.c" #include "heap.c" From caa5e51a67dd7c1a6efe0393a4f78986d2c9e547 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 22 Jan 2020 11:29:32 -0800 Subject: [PATCH 028/352] align size of page_t, increase slices per segment --- include/mimalloc-types.h | 32 ++++++++++++++++++-------------- src/init.c | 3 +++ src/page.c | 4 ++-- src/segment.c | 1 + 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 6685b5a7..661e2856 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -89,7 +89,7 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_SEGMENT_SHIFT ( 8 + MI_SEGMENT_SLICE_SHIFT) // 64mb +#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 64mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb @@ -104,7 +104,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SMALL_PAGE_SIZE (1ULL<0`) - mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads volatile _Atomic(uintptr_t) xheap; - - struct mi_page_s* next; // next page owned by this thread with the same `block_size` - struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + struct mi_page_s* next; // next page owned by this thread with the same `block_size` + struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + + // 64-bit 9 words, 32-bit 12 words, (+2 for secure) + #if MI_INTPTR_SIZE==8 + uintptr_t padding[1]; + #endif } mi_page_t; diff --git a/src/init.c b/src/init.c index a0873615..e77185ff 100644 --- a/src/init.c +++ b/src/init.c @@ -28,6 +28,9 @@ const mi_page_t _mi_page_empty = { ATOMIC_VAR_INIT(0), // xthread_free ATOMIC_VAR_INIT(0), // xheap NULL, NULL + #if MI_INTPTR_SIZE==8 + , { 0 } // padding + #endif }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) diff --git a/src/page.c b/src/page.c index 13706100..5b2a85f7 100644 --- a/src/page.c +++ b/src/page.c @@ -74,10 +74,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); - const size_t bsize = mi_page_block_size(page); mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); + //const size_t bsize = mi_page_block_size(page); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -86,7 +86,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { #if MI_DEBUG>3 // generally too expensive to check this if (page->flags.is_zero) { for(mi_block_t* block = page->free; block != NULL; mi_block_next(page,block)) { - mi_assert_expensive(mi_mem_is_zero(block + 1, page->block_size - sizeof(mi_block_t))); + mi_assert_expensive(mi_mem_is_zero(block + 1, bsize - sizeof(mi_block_t))); } } #endif diff --git a/src/segment.c b/src/segment.c index b3a33d60..22757968 100644 --- a/src/segment.c +++ b/src/segment.c @@ -458,6 +458,7 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st mask >>= 1; idx++; } + mi_assert_internal(segment->decommit_mask == 0); } static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { From a46d20a681cbae2d5e353974db9331b7d84c8eed Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 20:53:44 -0800 Subject: [PATCH 029/352] merge with new atomic macros --- include/mimalloc-types.h | 2 +- src/arena.c | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 661e2856..b65bf266 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -89,7 +89,7 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 64mb +#define MI_SEGMENT_SHIFT ( 8 + MI_SEGMENT_SLICE_SHIFT) // 16mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb diff --git a/src/arena.c b/src/arena.c index fe943e07..167cf751 100644 --- a/src/arena.c +++ b/src/arena.c @@ -126,7 +126,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* /* ----------------------------------------------------------- Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_MAX (64) // ~4GiB +#define MI_CACHE_MAX (128) // ~4GiB #define MI_MAX_NUMA (16) #define MI_SLOT_IN_USE ((void*)1) @@ -162,13 +162,13 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co for (int n = numa_min; n <= numa_max; n++) { for (int i = 0; i < MI_CACHE_MAX; i++) { slot = &cache[n][i]; - void* p = mi_atomic_read_ptr_relaxed(&slot->p); + void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p); if (p > MI_SLOT_IN_USE) { // not NULL or 1 - if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) { + if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { // claimed if (!*large && slot->is_large) { // back out again - mi_atomic_write_ptr(&slot->p, p); // make it available again + mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again } else { // keep it @@ -176,7 +176,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co *large = slot->is_large; *is_zero = false; bool committed = slot->is_committed; - mi_atomic_write_ptr(&slot->p, NULL); // set it free + mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free if (*commit && !committed) { bool commit_zero; _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); @@ -205,19 +205,19 @@ static void mi_cache_purge(mi_os_tld_t* tld) { int purged = 0; for (int i = 0; i < MI_CACHE_MAX; i++) { slot = &cache[numa_node][i]; - void* p = mi_atomic_read_ptr_relaxed(&slot->p); + void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { mi_msecs_t expire = slot->expire; if (expire != 0 && now >= expire) { // expired, try to claim it - if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, p)) { + if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { // claimed! test again if (slot->is_committed && !slot->is_large && now >= slot->expire) { _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats); slot->is_committed = false; } // and unclaim again - mi_atomic_write_ptr(&slot->p, p); + mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); purged++; if (purged >= 4) break; // limit to at most 4 decommits per push } @@ -240,9 +240,9 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit mi_cache_slot_t* slot; for (int i = 0; i < MI_CACHE_MAX; i++) { slot = &cache[numa_node][i]; - void* p = mi_atomic_read_ptr_relaxed(&slot->p); + void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); if (p == NULL) { // free slot - if (mi_atomic_cas_ptr_weak(&slot->p, MI_SLOT_IN_USE, NULL)) { + if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) { // claimed! slot->expire = 0; slot->is_committed = is_committed; @@ -258,7 +258,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit slot->expire = _mi_clock_now() + delay; } } - mi_atomic_write_ptr(&slot->p, start); // and make it available; + mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available; return true; } } From 09b98e0f7fee183df0e627ddcedcd9b870b156cf Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 27 Jan 2020 22:14:10 -0800 Subject: [PATCH 030/352] merge from dev-exp; resolve conflicts --- CMakeLists.txt | 4 ---- src/static.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f332156d..ab3946ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,10 +21,6 @@ set(mi_sources src/random.c src/os.c src/arena.c -<<<<<<< HEAD -======= - src/region.c ->>>>>>> dev-exp src/segment.c src/page.c src/alloc.c diff --git a/src/static.c b/src/static.c index 302fa72e..b3c71e02 100644 --- a/src/static.c +++ b/src/static.c @@ -18,10 +18,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "os.c" //#include "memory.c" #include "arena.c" -<<<<<<< HEAD -======= -#include "region.c" ->>>>>>> dev-exp #include "segment.c" #include "page.c" #include "heap.c" From 54e206a0a1f6aac0071a35ff5279d6c035a68b35 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 27 Jan 2020 22:41:24 -0800 Subject: [PATCH 031/352] increase retire page size --- src/page.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/page.c b/src/page.c index cb193579..dcd39ed7 100644 --- a/src/page.c +++ b/src/page.c @@ -393,6 +393,9 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } + +#define MI_MAX_RETIRE_SIZE (4*MI_SMALL_SIZE_MAX) + // Retire a page with no more used blocks // Important to not retire too quickly though as new // allocations might coming. @@ -413,7 +416,7 @@ void _mi_page_retire(mi_page_t* page) { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) { + if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = 16; @@ -427,7 +430,7 @@ void _mi_page_retire(mi_page_t* page) { // free retired pages: we don't need to look at the entire queues // since we only retire pages that are the last one in a queue. void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { - for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) { + for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_MAX_RETIRE_SIZE; pq++) { mi_page_t* page = pq->first; if (page != NULL && page->retire_expire != 0) { if (mi_page_all_free(page)) { @@ -684,7 +687,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_stat_counter_increase(heap->tld->stats.searches, count); if (page == NULL) { - _mi_heap_collect_retired(heap, false); // perhaps make a page available + _mi_heap_collect_retired(heap, false); // perhaps make a page available? page = mi_page_fresh(heap, pq); if (page == NULL && first_try) { // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again From d221a4b9049344758e31850cf0f2716b5e6ff7e3 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 27 Jan 2020 23:36:53 -0800 Subject: [PATCH 032/352] merge from dev-exp --- src/page.c | 3 +-- src/region.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/page.c b/src/page.c index dcd39ed7..42dfdfbf 100644 --- a/src/page.c +++ b/src/page.c @@ -393,8 +393,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } - -#define MI_MAX_RETIRE_SIZE (4*MI_SMALL_SIZE_MAX) +#define MI_MAX_RETIRE_SIZE (4*MI_SMALL_SIZE_MAX) // Retire a page with no more used blocks // Important to not retire too quickly though as new diff --git a/src/region.c b/src/region.c index 92758229..fd7d4544 100644 --- a/src/region.c +++ b/src/region.c @@ -284,7 +284,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); - *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); *is_large = info.x.is_large; *memid = mi_memid_create(region, bit_idx); void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); From f86519bca6ca2e3730cc2e0e27499729f1af816d Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 28 Apr 2020 16:46:00 -0700 Subject: [PATCH 033/352] make lazy commit default; add commit check on segment allocation --- src/options.c | 4 ++-- src/segment.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/options.c b/src/options.c index c9c92003..89048c7d 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand? + { 0, UNINIT, MI_OPTION(eager_commit) }, // commit on demand? #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory @@ -79,7 +79,7 @@ static mi_option_desc_t options[_mi_option_last] = #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed #endif - { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed + { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. diff --git a/src/segment.c b/src/segment.c index b8e5f2ec..ed587d5b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -671,7 +671,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (!commit) { // at least commit the info slices mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); - _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats); + bool ok = _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats); + if (!ok) return NULL; // failed to commit } segment->memid = memid; segment->mem_is_fixed = mem_large; From 0d25493c39a616b13fe59b83413d4bda3fff0afe Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 28 Apr 2020 16:50:03 -0700 Subject: [PATCH 034/352] segment size to 16MiB to improve perf on mstress and rptest --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index cf94809b..a2c3fa9a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 8mb +#define MI_SEGMENT_SHIFT ( 8 + MI_SEGMENT_SLICE_SHIFT) // 8mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb From c609248f0ee2f7daff898d8da516493aea1b1a34 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 30 Apr 2020 13:30:19 -0700 Subject: [PATCH 035/352] do delayed decommit if not reclaiming abandoned blocks --- src/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/segment.c b/src/segment.c index ed587d5b..ba7cf687 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1208,6 +1208,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again + mi_segment_delayed_decommit(segment, false, tld->stats); // decommit if needed mi_abandoned_visited_push(segment); } } From dad3be3c645a5e8844df50c38fc9c50d0cc88d6a Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 30 Apr 2020 17:21:36 -0700 Subject: [PATCH 036/352] update comments --- include/mimalloc-types.h | 2 +- src/arena.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index a2c3fa9a..12a420c2 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_SEGMENT_SHIFT ( 8 + MI_SEGMENT_SLICE_SHIFT) // 8mb +#define MI_SEGMENT_SHIFT ( 8 + MI_SEGMENT_SLICE_SHIFT) // 16mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb diff --git a/src/arena.c b/src/arena.c index 99eb766c..77616580 100644 --- a/src/arena.c +++ b/src/arena.c @@ -127,7 +127,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* /* ----------------------------------------------------------- Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_MAX (128) // ~4GiB +#define MI_CACHE_MAX (128) #define MI_MAX_NUMA (16) #define MI_SLOT_IN_USE ((void*)1) From dcb3574cf05c66ca141d86b3ad33089495f9fbca Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 May 2020 21:14:41 -0700 Subject: [PATCH 037/352] fix assertions for huge segment free --- include/mimalloc-internal.h | 1 + src/alloc.c | 31 +------------------------------ src/segment.c | 13 ++++++++----- 3 files changed, 10 insertions(+), 35 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e264751f..01be32c8 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -82,6 +82,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); diff --git a/src/alloc.c b/src/alloc.c index 2ee8b720..b948071b 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -286,35 +286,6 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co // Free // ------------------------------------------------------ -// free huge block from another thread -static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { - // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(segment->kind==MI_SEGMENT_HUGE); - mi_assert_internal(segment == _mi_page_segment(page)); - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); - - // claim it and free - mi_heap_t* heap = mi_get_default_heap(); - // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; - page->is_zero = false; - mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - const size_t bsize = mi_page_block_size(page); - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - mi_assert_internal(false); - _mi_stat_decrease(&tld->stats.large, bsize); - } - else { - _mi_stat_decrease(&tld->stats.huge, bsize); - } - _mi_segment_page_free(page, true, &tld->segments); - } -} - // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { @@ -329,7 +300,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); if (segment->kind==MI_SEGMENT_HUGE) { - mi_free_huge_block_mt(segment, page, block); + _mi_segment_huge_page_free(segment, page, block); return; } diff --git a/src/segment.c b/src/segment.c index ba7cf687..cd239931 100644 --- a/src/segment.c +++ b/src/segment.c @@ -166,8 +166,8 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_slice_t* last = &segment->slices[maxindex]; mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset); mi_assert_internal(slice == last || last->slice_count == 0 ); - mi_assert_internal(last->xblock_size == 0); - if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { // segment is not huge or abandonded + mi_assert_internal(last->xblock_size == 0 || (segment->kind==MI_SEGMENT_HUGE && last->xblock_size==1)); + if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandonded sq = mi_span_queue_for(slice->slice_count,tld); mi_assert_internal(mi_span_queue_contains(sq,slice)); } @@ -525,8 +525,10 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ // for huge pages, just mark as free but don't add to the queues if (segment->kind == MI_SEGMENT_HUGE) { - mi_assert_internal(segment->used == 0); + mi_assert_internal(segment->used == 1); // decreased right after this call in `mi_segment_page_clear` slice->xblock_size = 0; // mark as free anyways + // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to + // avoid a possible cache miss (and the segment is about to be freed) return slice; } @@ -1022,8 +1024,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } - // perform delayed decommits instead - mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld->stats); + // perform delayed decommits + mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); @@ -1297,6 +1299,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld // free huge block from another thread void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { // huge page segments are always abandoned and can be freed immediately by any thread + mi_assert_internal(segment->kind==MI_SEGMENT_HUGE); mi_assert_internal(segment == _mi_page_segment(page)); mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); From 8bfd5ec865a2f6f1a7d237092daa43c93aec5e2c Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 May 2020 23:00:17 -0700 Subject: [PATCH 038/352] improve arena cache to avoid full scans --- src/arena.c | 54 ++++++++++++++++++++++++++++++++++++++++++++------- src/options.c | 2 +- src/segment.c | 4 ++-- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/arena.c b/src/arena.c index 77616580..3f90a07d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -127,8 +127,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* /* ----------------------------------------------------------- Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_MAX (128) -#define MI_MAX_NUMA (16) +#define MI_CACHE_MAX (256) +#define MI_MAX_NUMA (8) #define MI_SLOT_IN_USE ((void*)1) @@ -140,7 +140,20 @@ typedef struct mi_cache_slot_s { volatile bool is_large; } mi_cache_slot_t; -static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; +static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; // = 0 +static volatile _Atomic(uintptr_t) cache_count[MI_MAX_NUMA]; // = 0 + +typedef union mi_cache_count_u { + uintptr_t value; + struct { + int16_t count; // at most `count` elements in the cache +#if MI_INTPTR_SIZE > 4 + uint32_t epoch; // each push/pop increase this +#else + uint16_t epoch; +#endif + } x; +} mi_cache_count_t; static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { // only segment blocks @@ -161,10 +174,23 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co // find a free slot mi_cache_slot_t* slot; for (int n = numa_min; n <= numa_max; n++) { - for (int i = 0; i < MI_CACHE_MAX; i++) { + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[n]); + int16_t count = top.x.count; + for (int16_t i = count - 1; i >= 0; i--) { slot = &cache[n][i]; void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p); - if (p > MI_SLOT_IN_USE) { // not NULL or 1 + if (p == NULL) { + if (count > 0) { count = i; } + } + else if (p > MI_SLOT_IN_USE) { // not NULL or 1 + if (count >= 0 && count < top.x.count) { // new lower bound? + mi_cache_count_t newtop = { 0 }; + newtop.x.count = count; + newtop.x.epoch = top.x.epoch + 1; + mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value); // it's fine to not succeed; just causes longer scans + } + count = -1; // don't try to set lower bound again if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { // claimed if (!*large && slot->is_large) { @@ -204,7 +230,9 @@ static void mi_cache_purge(mi_os_tld_t* tld) { if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; mi_cache_slot_t* slot; int purged = 0; - for (int i = 0; i < MI_CACHE_MAX; i++) { + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + for (int i = 0; i < top.x.count; i++) { slot = &cache[numa_node][i]; void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { @@ -240,12 +268,24 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit int numa_node = _mi_os_numa_node(NULL); if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; mi_cache_slot_t* slot; - for (int i = 0; i < MI_CACHE_MAX; i++) { + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) { slot = &cache[numa_node][i]; void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); if (p == NULL) { // free slot if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) { // claimed! + // first try to increase the top bound + mi_cache_count_t newtop = { 0 }; + newtop.x.count = i+1; + newtop.x.epoch = top.x.epoch + 1; + while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) { + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + if (top.x.count > newtop.x.count) break; // another push max'd it + newtop.x.epoch = top.x.epoch + 1; // otherwise try again + } + // set the slot slot->expire = 0; slot->is_committed = is_committed; slot->memid = memid; diff --git a/src/options.c b/src/options.c index 89048c7d..767a7c35 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 0, UNINIT, MI_OPTION(eager_commit) }, // commit on demand? + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand? #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory diff --git a/src/segment.c b/src/segment.c index cd239931..5cf1598d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -960,6 +960,7 @@ static void mi_abandoned_push(mi_segment_t* segment) { } // Wait until there are no more pending reads on segments that used to be in the abandoned list +// called for example from `arena.c` before decommitting void _mi_abandoned_await_readers(void) { uintptr_t n; do { @@ -982,8 +983,7 @@ static mi_segment_t* mi_abandoned_pop(void) { // Do a pop. We use a reader count to prevent // a segment to be decommitted while a read is still pending, - // and a tagged pointer to prevent A-B-A link corruption. - // (this is called from `memory.c:_mi_mem_free` for example) + // and a tagged pointer to prevent A-B-A link corruption. mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; do { From 79da2728c42bbc59922b4df7decdaa9f7fb99501 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 00:12:45 -0700 Subject: [PATCH 039/352] reduce cache --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 3f90a07d..55f09a60 100644 --- a/src/arena.c +++ b/src/arena.c @@ -128,7 +128,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena cache ----------------------------------------------------------- */ #define MI_CACHE_MAX (256) -#define MI_MAX_NUMA (8) +#define MI_MAX_NUMA (4) #define MI_SLOT_IN_USE ((void*)1) From 01ad5539780b4696b734eec964dbc59bacb2c45d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 00:13:03 -0700 Subject: [PATCH 040/352] set default reset delay to 250ms --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 767a7c35..382dd65e 100644 --- a/src/options.c +++ b/src/options.c @@ -80,7 +80,7 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed - { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 250, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose From dd188529464a8c42c12a686a8d085b2137b049fa Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 00:13:40 -0700 Subject: [PATCH 041/352] reduce page retire cycles --- src/page.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/page.c b/src/page.c index 3bc146bc..ebd88253 100644 --- a/src/page.c +++ b/src/page.c @@ -394,7 +394,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { } #define MI_MAX_RETIRE_SIZE (MI_MEDIUM_OBJ_SIZE_MAX) -#define MI_RETIRE_CYCLES (16) +#define MI_RETIRE_CYCLES (8) // Retire a page with no more used blocks // Important to not retire too quickly though as new @@ -419,7 +419,7 @@ void _mi_page_retire(mi_page_t* page) { if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = MI_RETIRE_CYCLES; + page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; From 1b158d8e805734981a1a8e7ea20aa95f6b5c77e4 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 10:37:07 -0700 Subject: [PATCH 042/352] set max retire size to MAX_MEDIUM_OBJ_SIZE --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 022e2fd1..7c7b5dc1 100644 --- a/src/page.c +++ b/src/page.c @@ -393,7 +393,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } -#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX +#define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX #define MI_RETIRE_CYCLES (8) // Retire a page with no more used blocks From 37b43e4cea3e8e07734d8118c11c3950a3bd26b1 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 10:37:33 -0700 Subject: [PATCH 043/352] improved arena cache --- src/arena.c | 258 +++++++++++++++--------------------- src/bitmap.inc.c | 20 ++- test/main-override-static.c | 2 +- 3 files changed, 127 insertions(+), 153 deletions(-) diff --git a/src/arena.c b/src/arena.c index 55f09a60..c12e60a1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -127,186 +127,146 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* /* ----------------------------------------------------------- Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_MAX (256) -#define MI_MAX_NUMA (4) - -#define MI_SLOT_IN_USE ((void*)1) +#define MI_CACHE_FIELDS (8) +#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 512 on 64-bit typedef struct mi_cache_slot_s { - volatile _Atomic(void*) p; - volatile size_t memid; - volatile mi_msecs_t expire; - volatile bool is_committed; - volatile bool is_large; + void* p; + size_t memid; + mi_msecs_t expire; + bool is_committed; // TODO: use bit from p to reduce size? } mi_cache_slot_t; -static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; // = 0 -static volatile _Atomic(uintptr_t) cache_count[MI_MAX_NUMA]; // = 0 +static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 + +#define BITS_SET() (UINTPTR_MAX) +static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; // zero bit = available! +static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; +static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free -typedef union mi_cache_count_u { - uintptr_t value; - struct { - int16_t count; // at most `count` elements in the cache -#if MI_INTPTR_SIZE > 4 - uint32_t epoch; // each push/pop increase this -#else - uint16_t epoch; -#endif - } x; -} mi_cache_count_t; static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { // only segment blocks if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL; - // set numa range - int numa_min = numa_node; - int numa_max = numa_min; - if (numa_node < 0) { - numa_min = 0; - numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA; - } - else { - if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - numa_min = numa_max = numa_node; + // numa node determines start field + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; } - // find a free slot - mi_cache_slot_t* slot; - for (int n = numa_min; n <= numa_max; n++) { - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[n]); - int16_t count = top.x.count; - for (int16_t i = count - 1; i >= 0; i--) { - slot = &cache[n][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t,&slot->p); - if (p == NULL) { - if (count > 0) { count = i; } - } - else if (p > MI_SLOT_IN_USE) { // not NULL or 1 - if (count >= 0 && count < top.x.count) { // new lower bound? - mi_cache_count_t newtop = { 0 }; - newtop.x.count = count; - newtop.x.epoch = top.x.epoch + 1; - mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value); // it's fine to not succeed; just causes longer scans - } - count = -1; // don't try to set lower bound again - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { - // claimed - if (!*large && slot->is_large) { - // back out again - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again - } - else { - // keep it - *memid = slot->memid; - *large = slot->is_large; - *is_zero = false; - bool committed = slot->is_committed; - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free - if (*commit && !committed) { - bool commit_zero; - _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); - *commit = true; - } - else { - *commit = committed; - } - return p; - } - } - } - } + // find an available slot + mi_bitmap_index_t bitidx; + bool claimed = false; + if (*large) { // large allowed? + claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = true; } - return NULL; + if (!claimed) { + claimed = mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = false; + } + + if (!claimed) return NULL; + + // found a slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + void* p = slot->p; + *memid = slot->memid; + *is_zero = false; + bool committed = slot->is_committed; + slot->p = NULL; + slot->expire = 0; + if (*commit && !committed) { + bool commit_zero; + _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); + *commit = true; + } + else { + *commit = committed; + } + + // mark the slot as free again + mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); + mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); + return p; } static void mi_cache_purge(mi_os_tld_t* tld) { - // TODO: for each numa node instead? - // if (mi_option_get(mi_option_arena_reset_delay) == 0) return; - + UNUSED(tld); mi_msecs_t now = _mi_clock_now(); - int numa_node = _mi_os_numa_node(NULL); - if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - mi_cache_slot_t* slot; - int purged = 0; - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - for (int i = 0; i < top.x.count; i++) { - slot = &cache[numa_node][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); - if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { - mi_msecs_t expire = slot->expire; - if (expire != 0 && now >= expire) { - // expired, try to claim it - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { - // claimed! test again - if (slot->is_committed && !slot->is_large && now >= slot->expire) { - _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats); - slot->is_committed = false; - } - // and unclaim again - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); - purged++; - if (purged >= 4) break; // limit to at most 4 decommits per push + size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start + size_t purged = 0; + for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots + if (idx >= MI_CACHE_MAX) idx = 0; // wrap + mi_cache_slot_t* slot = &cache[idx]; + if (slot->expire != 0 && now >= slot->expire) { // racy read + // seems expired, first claim it from available + purged++; + mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); + if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { + // was available, we claimed it + if (slot->expire != 0 && now >= slot->expire) { // safe read + // still expired, decommit it + slot->expire = 0; + mi_assert_internal(slot->is_committed && !slot->is_large); + _mi_abandoned_await_readers(); // wait until safe to decommit + _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); + slot->is_committed = false; } + mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } + if (purged > 4) break; // bound to no more than 4 purge tries per push } } } - static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) { - mi_cache_purge(tld); - // only for segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - // try to add it to the cache + // numa node determines start field int numa_node = _mi_os_numa_node(NULL); - if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - mi_cache_slot_t* slot; - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) { - slot = &cache[numa_node][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); - if (p == NULL) { // free slot - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) { - // claimed! - // first try to increase the top bound - mi_cache_count_t newtop = { 0 }; - newtop.x.count = i+1; - newtop.x.epoch = top.x.epoch + 1; - while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) { - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - if (top.x.count > newtop.x.count) break; // another push max'd it - newtop.x.epoch = top.x.epoch + 1; // otherwise try again - } - // set the slot - slot->expire = 0; - slot->is_committed = is_committed; - slot->memid = memid; - slot->is_large = is_large; - if (is_committed) { - long delay = mi_option_get(mi_option_arena_reset_delay); - if (delay == 0 && !is_large) { - _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(start, size, tld->stats); - slot->is_committed = false; - } - else { - slot->expire = _mi_clock_now() + delay; - } - } - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available; - return true; - } + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // purge expired entries + mi_cache_purge(tld); + + // find an available slot + mi_bitmap_index_t bitidx; + bool claimed = mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (!claimed) return false; + + mi_assert_internal(mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + + // set the slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + slot->p = start; + slot->memid = memid; + slot->expire = 0; + slot->is_committed = is_committed; + if (is_committed && !is_large) { + long delay = mi_option_get(mi_option_arena_reset_delay); + if (delay == 0) { + _mi_abandoned_await_readers(); // wait until safe to decommit + _mi_os_decommit(start, size, tld->stats); + slot->is_committed = false; + } + else { + slot->expire = _mi_clock_now() + delay; } } - return false; + + // make it available + mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); + return true; } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index c3813a44..a107545f 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -42,6 +42,11 @@ static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx return (idx*MI_BITMAP_FIELD_BITS) + bitidx; } +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) { + return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS); +} + // Get the field index from a bit index. static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { return (bitmap_idx / MI_BITMAP_FIELD_BITS); @@ -177,11 +182,13 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx return false; } - // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { - for (size_t idx = 0; idx < bitmap_fields; idx++) { +static inline bool mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } @@ -189,6 +196,13 @@ static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fi return false; } + +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { + return mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); +} + // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { diff --git a/test/main-override-static.c b/test/main-override-static.c index 1ab11385..3ec02bdf 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -184,7 +184,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); - block_overflow1(); + //block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24); From b8846f7a27b7000c826df5abebb3268d58b8f459 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 10:51:10 -0700 Subject: [PATCH 044/352] fix unprotect of guard pages --- include/mimalloc-types.h | 2 +- src/arena.c | 2 +- src/segment.c | 7 ++++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 12a420c2..35bb1502 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -33,7 +33,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 0 +#define MI_SECURE 4 #endif // Define MI_DEBUG for debug mode diff --git a/src/arena.c b/src/arena.c index c12e60a1..85d5fd3f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -210,7 +210,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) { if (slot->expire != 0 && now >= slot->expire) { // safe read // still expired, decommit it slot->expire = 0; - mi_assert_internal(slot->is_committed && !slot->is_large); + mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); _mi_abandoned_await_readers(); // wait until safe to decommit _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); slot->is_committed = false; diff --git a/src/segment.c b/src/segment.c index 6728ef3b..4d9d6809 100644 --- a/src/segment.c +++ b/src/segment.c @@ -273,7 +273,12 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)mi_segment_size(segment)),tld); if (MI_SECURE>0) { - _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set + // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set + // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted + size_t os_page_size = _mi_os_page_size(); + _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_page_size, os_page_size); + uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_page_size; + _mi_os_unprotect(end, os_page_size); } // purge delayed decommits now? (no, leave it to the cache) From 66e5484c1c3f7853710e41ba05afc91e8025c4c4 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 11:23:25 -0700 Subject: [PATCH 045/352] fix assertions for huge pages in secure mode --- src/arena.c | 2 +- src/segment.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 85d5fd3f..5215f934 100644 --- a/src/arena.c +++ b/src/arena.c @@ -157,7 +157,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co } // find an available slot - mi_bitmap_index_t bitidx; + mi_bitmap_index_t bitidx = 0; bool claimed = false; if (*large) { // large allowed? claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); diff --git a/src/segment.c b/src/segment.c index 4d9d6809..9a59c878 100644 --- a/src/segment.c +++ b/src/segment.c @@ -164,10 +164,12 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { } else { // free range of slices; only last slice needs a valid back offset mi_slice_t* last = &segment->slices[maxindex]; - mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset); + if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= segment->slice_entries) { + mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset); + } mi_assert_internal(slice == last || last->slice_count == 0 ); mi_assert_internal(last->xblock_size == 0 || (segment->kind==MI_SEGMENT_HUGE && last->xblock_size==1)); - if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandonded + if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandoned sq = mi_span_queue_for(slice->slice_count,tld); mi_assert_internal(mi_span_queue_contains(sq,slice)); } @@ -713,6 +715,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); // set up guard pages + size_t guard_slices = 0; if (MI_SECURE>0) { // in secure mode, we set up a protected page in between the segment info // and the page data @@ -723,6 +726,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ mi_segment_ensure_committed(segment, end, os_page_size, tld->stats); _mi_os_protect(end, os_page_size); if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-( + guard_slices = 1; } // reserve first slices for segment info @@ -737,7 +741,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ } else { mi_assert_internal(huge_page!=NULL); - *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices, tld); + *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); } mi_assert_expensive(mi_segment_is_valid(segment,tld)); From 18d697a1e6abd4713558bc687677f21ae0e4ba49 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 11:57:33 -0700 Subject: [PATCH 046/352] roll back to old arena cache as it seems to do better on AMD --- src/arena.c | 265 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 155 insertions(+), 110 deletions(-) diff --git a/src/arena.c b/src/arena.c index 5215f934..db30e8fc 100644 --- a/src/arena.c +++ b/src/arena.c @@ -127,146 +127,191 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* /* ----------------------------------------------------------- Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_FIELDS (8) -#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 512 on 64-bit + + +/* ----------------------------------------------------------- + Arena cache +----------------------------------------------------------- */ +#define MI_CACHE_MAX (256) +#define MI_MAX_NUMA (4) + +#define MI_SLOT_IN_USE ((void*)1) typedef struct mi_cache_slot_s { - void* p; - size_t memid; - mi_msecs_t expire; - bool is_committed; // TODO: use bit from p to reduce size? + volatile _Atomic(void*)p; + volatile size_t memid; + volatile mi_msecs_t expire; + volatile bool is_committed; + volatile bool is_large; } mi_cache_slot_t; -static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 - -#define BITS_SET() (UINTPTR_MAX) -static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; // zero bit = available! -static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; -static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free +static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; // = 0 +static volatile _Atomic(uintptr_t)cache_count[MI_MAX_NUMA]; // = 0 +typedef union mi_cache_count_u { + uintptr_t value; + struct { + int16_t count; // at most `count` elements in the cache +#if MI_INTPTR_SIZE > 4 + uint32_t epoch; // each push/pop increase this +#else + uint16_t epoch; +#endif + } x; +} mi_cache_count_t; static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { // only segment blocks if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL; - // numa node determines start field - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot - mi_bitmap_index_t bitidx = 0; - bool claimed = false; - if (*large) { // large allowed? - claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (claimed) *large = true; - } - if (!claimed) { - claimed = mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (claimed) *large = false; - } - - if (!claimed) return NULL; - - // found a slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - void* p = slot->p; - *memid = slot->memid; - *is_zero = false; - bool committed = slot->is_committed; - slot->p = NULL; - slot->expire = 0; - if (*commit && !committed) { - bool commit_zero; - _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); - *commit = true; + // set numa range + int numa_min = numa_node; + int numa_max = numa_min; + if (numa_node < 0) { + numa_min = 0; + numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA; } else { - *commit = committed; + if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; + numa_min = numa_max = numa_node; } - // mark the slot as free again - mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); - return p; + // find a free slot + mi_cache_slot_t* slot; + for (int n = numa_min; n <= numa_max; n++) { + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[n]); + int16_t count = top.x.count; + for (int16_t i = count - 1; i >= 0; i--) { + slot = &cache[n][i]; + void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); + if (p == NULL) { + if (count > 0) { count = i; } + } + else if (p > MI_SLOT_IN_USE) { // not NULL or 1 + if (count >= 0 && count < top.x.count) { // new lower bound? + mi_cache_count_t newtop = { 0 }; + newtop.x.count = count; + newtop.x.epoch = top.x.epoch + 1; + mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value); // it's fine to not succeed; just causes longer scans + } + count = -1; // don't try to set lower bound again + if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { + // claimed + if (!*large && slot->is_large) { + // back out again + mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again + } + else { + // keep it + *memid = slot->memid; + *large = slot->is_large; + *is_zero = false; + bool committed = slot->is_committed; + mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free + if (*commit && !committed) { + bool commit_zero; + _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); + *commit = true; + } + else { + *commit = committed; + } + return p; + } + } + } + } + } + return NULL; } static void mi_cache_purge(mi_os_tld_t* tld) { - UNUSED(tld); + // TODO: for each numa node instead? + // if (mi_option_get(mi_option_arena_reset_delay) == 0) return; + mi_msecs_t now = _mi_clock_now(); - size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start - size_t purged = 0; - for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots - if (idx >= MI_CACHE_MAX) idx = 0; // wrap - mi_cache_slot_t* slot = &cache[idx]; - if (slot->expire != 0 && now >= slot->expire) { // racy read - // seems expired, first claim it from available - purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { - // was available, we claimed it - if (slot->expire != 0 && now >= slot->expire) { // safe read - // still expired, decommit it - slot->expire = 0; - mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); - _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); - slot->is_committed = false; + int numa_node = _mi_os_numa_node(NULL); + if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; + mi_cache_slot_t* slot; + int purged = 0; + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + for (int i = 0; i < top.x.count; i++) { + slot = &cache[numa_node][i]; + void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); + if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { + mi_msecs_t expire = slot->expire; + if (expire != 0 && now >= expire) { + // expired, try to claim it + if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { + // claimed! test again + if (slot->is_committed && !slot->is_large && now >= slot->expire) { + _mi_abandoned_await_readers(); // wait until safe to decommit + _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats); + slot->is_committed = false; + } + // and unclaim again + mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); + purged++; + if (purged >= 4) break; // limit to at most 4 decommits per push } - mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } - if (purged > 4) break; // bound to no more than 4 purge tries per push } } } -static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) -{ - // only for segment blocks - if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // numa node determines start field - int numa_node = _mi_os_numa_node(NULL); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - // purge expired entries +static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) +{ mi_cache_purge(tld); - // find an available slot - mi_bitmap_index_t bitidx; - bool claimed = mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (!claimed) return false; + // only for segment blocks + if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - mi_assert_internal(mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); - - // set the slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - slot->p = start; - slot->memid = memid; - slot->expire = 0; - slot->is_committed = is_committed; - if (is_committed && !is_large) { - long delay = mi_option_get(mi_option_arena_reset_delay); - if (delay == 0) { - _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(start, size, tld->stats); - slot->is_committed = false; - } - else { - slot->expire = _mi_clock_now() + delay; + // try to add it to the cache + int numa_node = _mi_os_numa_node(NULL); + if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; + mi_cache_slot_t* slot; + mi_cache_count_t top = { 0 }; + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) { + slot = &cache[numa_node][i]; + void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); + if (p == NULL) { // free slot + if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) { + // claimed! + // first try to increase the top bound + mi_cache_count_t newtop = { 0 }; + newtop.x.count = i+1; + newtop.x.epoch = top.x.epoch + 1; + while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) { + top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); + if (top.x.count > newtop.x.count) break; // another push max'd it + newtop.x.epoch = top.x.epoch + 1; // otherwise try again + } + // set the slot + slot->expire = 0; + slot->is_committed = is_committed; + slot->memid = memid; + slot->is_large = is_large; + if (is_committed) { + long delay = mi_option_get(mi_option_arena_reset_delay); + if (delay == 0 && !is_large) { + _mi_abandoned_await_readers(); // wait until safe to decommit + _mi_os_decommit(start, size, tld->stats); + slot->is_committed = false; + } + else { + slot->expire = _mi_clock_now() + delay; + } + } + mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available; + return true; + } } } - - // make it available - mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); - return true; + return false; } From 69158f2c76ef3ce2b61c8f0326ce02d451cc9c5d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 12:04:36 -0700 Subject: [PATCH 047/352] roll back again to new arena cache: previous perf regression was caused due to accidentally making secure mode default --- include/mimalloc-types.h | 2 +- src/arena.c | 266 ++++++++++++++++----------------------- 2 files changed, 112 insertions(+), 156 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 35bb1502..12a420c2 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -33,7 +33,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 4 +#define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode diff --git a/src/arena.c b/src/arena.c index db30e8fc..b946ae64 100644 --- a/src/arena.c +++ b/src/arena.c @@ -128,190 +128,146 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena cache ----------------------------------------------------------- */ - -/* ----------------------------------------------------------- - Arena cache ------------------------------------------------------------ */ -#define MI_CACHE_MAX (256) -#define MI_MAX_NUMA (4) - -#define MI_SLOT_IN_USE ((void*)1) +#define MI_CACHE_FIELDS (8) +#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 512 on 64-bit typedef struct mi_cache_slot_s { - volatile _Atomic(void*)p; - volatile size_t memid; - volatile mi_msecs_t expire; - volatile bool is_committed; - volatile bool is_large; + void* p; + size_t memid; + mi_msecs_t expire; + bool is_committed; // TODO: use bit from p to reduce size? } mi_cache_slot_t; -static mi_cache_slot_t cache[MI_MAX_NUMA][MI_CACHE_MAX]; // = 0 -static volatile _Atomic(uintptr_t)cache_count[MI_MAX_NUMA]; // = 0 +static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 + +#define BITS_SET() (UINTPTR_MAX) +static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; // zero bit = available! +static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; +static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free -typedef union mi_cache_count_u { - uintptr_t value; - struct { - int16_t count; // at most `count` elements in the cache -#if MI_INTPTR_SIZE > 4 - uint32_t epoch; // each push/pop increase this -#else - uint16_t epoch; -#endif - } x; -} mi_cache_count_t; static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { // only segment blocks if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL; - // set numa range - int numa_min = numa_node; - int numa_max = numa_min; - if (numa_node < 0) { - numa_min = 0; - numa_max = _mi_os_numa_node_count() % MI_MAX_NUMA; - } - else { - if (numa_node >= MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - numa_min = numa_max = numa_node; + // numa node determines start field + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; } - // find a free slot - mi_cache_slot_t* slot; - for (int n = numa_min; n <= numa_max; n++) { - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[n]); - int16_t count = top.x.count; - for (int16_t i = count - 1; i >= 0; i--) { - slot = &cache[n][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); - if (p == NULL) { - if (count > 0) { count = i; } - } - else if (p > MI_SLOT_IN_USE) { // not NULL or 1 - if (count >= 0 && count < top.x.count) { // new lower bound? - mi_cache_count_t newtop = { 0 }; - newtop.x.count = count; - newtop.x.epoch = top.x.epoch + 1; - mi_atomic_cas_strong(&cache_count[n], newtop.value, top.value); // it's fine to not succeed; just causes longer scans - } - count = -1; // don't try to set lower bound again - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { - // claimed - if (!*large && slot->is_large) { - // back out again - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); // make it available again - } - else { - // keep it - *memid = slot->memid; - *large = slot->is_large; - *is_zero = false; - bool committed = slot->is_committed; - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, NULL); // set it free - if (*commit && !committed) { - bool commit_zero; - _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); - *commit = true; - } - else { - *commit = committed; - } - return p; - } - } - } - } + // find an available slot + mi_bitmap_index_t bitidx = 0; + bool claimed = false; + if (*large) { // large allowed? + claimed = mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = true; } - return NULL; + if (!claimed) { + claimed = mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = false; + } + + if (!claimed) return NULL; + + // found a slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + void* p = slot->p; + *memid = slot->memid; + *is_zero = false; + bool committed = slot->is_committed; + slot->p = NULL; + slot->expire = 0; + if (*commit && !committed) { + bool commit_zero; + _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); + *commit = true; + } + else { + *commit = committed; + } + + // mark the slot as free again + mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); + mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); + return p; } static void mi_cache_purge(mi_os_tld_t* tld) { - // TODO: for each numa node instead? - // if (mi_option_get(mi_option_arena_reset_delay) == 0) return; - + UNUSED(tld); mi_msecs_t now = _mi_clock_now(); - int numa_node = _mi_os_numa_node(NULL); - if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - mi_cache_slot_t* slot; - int purged = 0; - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - for (int i = 0; i < top.x.count; i++) { - slot = &cache[numa_node][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); - if (p > MI_SLOT_IN_USE && !slot->is_committed && !slot->is_large) { - mi_msecs_t expire = slot->expire; - if (expire != 0 && now >= expire) { - // expired, try to claim it - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) { - // claimed! test again - if (slot->is_committed && !slot->is_large && now >= slot->expire) { - _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats); - slot->is_committed = false; - } - // and unclaim again - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, p); - purged++; - if (purged >= 4) break; // limit to at most 4 decommits per push + size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start + size_t purged = 0; + for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots + if (idx >= MI_CACHE_MAX) idx = 0; // wrap + mi_cache_slot_t* slot = &cache[idx]; + if (slot->expire != 0 && now >= slot->expire) { // racy read + // seems expired, first claim it from available + purged++; + mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); + if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { + // was available, we claimed it + if (slot->expire != 0 && now >= slot->expire) { // safe read + // still expired, decommit it + slot->expire = 0; + mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + _mi_abandoned_await_readers(); // wait until safe to decommit + _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); + slot->is_committed = false; } + mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } + if (purged > 4) break; // bound to no more than 4 purge tries per push } } } - -static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) +static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) { - mi_cache_purge(tld); - // only for segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // try to add it to the cache + + // numa node determines start field int numa_node = _mi_os_numa_node(NULL); - if (numa_node > MI_MAX_NUMA) numa_node %= MI_MAX_NUMA; - mi_cache_slot_t* slot; - mi_cache_count_t top = { 0 }; - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - for (int16_t i = top.x.count; i < MI_CACHE_MAX; i++) { - slot = &cache[numa_node][i]; - void* p = mi_atomic_read_ptr_relaxed(mi_cache_slot_t, &slot->p); - if (p == NULL) { // free slot - if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, NULL)) { - // claimed! - // first try to increase the top bound - mi_cache_count_t newtop = { 0 }; - newtop.x.count = i+1; - newtop.x.epoch = top.x.epoch + 1; - while (!mi_atomic_cas_strong(&cache_count[numa_node], newtop.value, top.value)) { - top.value = mi_atomic_read_relaxed(&cache_count[numa_node]); - if (top.x.count > newtop.x.count) break; // another push max'd it - newtop.x.epoch = top.x.epoch + 1; // otherwise try again - } - // set the slot - slot->expire = 0; - slot->is_committed = is_committed; - slot->memid = memid; - slot->is_large = is_large; - if (is_committed) { - long delay = mi_option_get(mi_option_arena_reset_delay); - if (delay == 0 && !is_large) { - _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(start, size, tld->stats); - slot->is_committed = false; - } - else { - slot->expire = _mi_clock_now() + delay; - } - } - mi_atomic_write_ptr(mi_cache_slot_t, &slot->p, start); // and make it available; - return true; - } + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // purge expired entries + mi_cache_purge(tld); + + // find an available slot + mi_bitmap_index_t bitidx; + bool claimed = mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (!claimed) return false; + + mi_assert_internal(mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + + // set the slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + slot->p = start; + slot->memid = memid; + slot->expire = 0; + slot->is_committed = is_committed; + if (is_committed && !is_large) { + long delay = mi_option_get(mi_option_arena_reset_delay); + if (delay == 0) { + _mi_abandoned_await_readers(); // wait until safe to decommit + _mi_os_decommit(start, size, tld->stats); + slot->is_committed = false; + } + else { + slot->expire = _mi_clock_now() + delay; } } - return false; + + // make it available + mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); + return true; } From a4b7baf6fd745d40883b5be9017105e318b3afd3 Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 2 May 2020 18:08:31 -0700 Subject: [PATCH 048/352] Update readme with descriptions of secure and debug mode --- readme.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/readme.md b/readme.md index 583d54ed..fd600763 100644 --- a/readme.md +++ b/readme.md @@ -255,6 +255,32 @@ OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 +## Secure Mode + +_mimalloc_ can be build in secure mode by using the `-DMI_SECURE=ON` flags in `cmake`. This build enables various mitigations +to make mimalloc more robust against exploits. In particular: + +- All internal mimalloc pages are surrounded by guard pages and the heap metadata is behind a guard page as well (so a buffer overflow + exploit cannot reach into the metadata), +- All free list pointers are + [encoded](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) + with per-page keys which is used both to prevent overwrites with a known pointer, as well as to detect heap corruption, +- Double free's are detected (and ignored), +- The free lists are initialized in a random order and allocation randomly chooses between extension and reuse within a page to + mitigate against attacks that rely on a predicable allocation order. Similarly, the larger heap blocks allocated by mimalloc + from the OS are also address randomized. + +As always, evaluate with care as part of an overall security strategy as all of the above are mitigations but not guarantees. + +## Debug Mode + +When _mimalloc_ is built using debug mode, various checks are done at runtime to catch development errors. + +- Statistics are maintained in detail for each object size. They can be shown using `MIMALLOC_SHOW_STATS=1` at runtime. +- All objects have padding at the end to detect (byte precise) heap block overflows. +- Double free's, and freeing invalid heap pointers are detected. +- Corrupted free-lists and some forms of use-after-free are detected. + # Overriding Malloc From ea92fb2fe4c21118eac145615dfade37ec22fc92 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 21:40:14 -0700 Subject: [PATCH 049/352] lower arena reset delay --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 382dd65e..1616e9a6 100644 --- a/src/options.c +++ b/src/options.c @@ -81,7 +81,7 @@ static mi_option_desc_t options[_mi_option_last] = #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 250, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds + { 250, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output From e5b72cdfe7dfcb495a5a2a6e4a0a0bbf0b9d8058 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 May 2020 22:22:35 -0700 Subject: [PATCH 050/352] reduce segment size and increase cache --- include/mimalloc-types.h | 2 +- src/arena.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 12a420c2..211ecfec 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -100,7 +100,7 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_SEGMENT_SHIFT ( 8 + MI_SEGMENT_SLICE_SHIFT) // 16mb +#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 8mb #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb diff --git a/src/arena.c b/src/arena.c index b946ae64..351d9fb5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -128,8 +128,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_FIELDS (8) -#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 512 on 64-bit +#define MI_CACHE_FIELDS (16) +#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit typedef struct mi_cache_slot_s { void* p; @@ -141,8 +141,8 @@ typedef struct mi_cache_slot_s { static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 #define BITS_SET() (UINTPTR_MAX) -static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; // zero bit = available! -static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT8(BITS_SET) }; +static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT16(BITS_SET) }; // zero bit = available! +static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT16(BITS_SET) }; static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free From 28f4f1ce04b1438824429dc37b749d9a0ca42005 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 May 2020 10:45:46 -0700 Subject: [PATCH 051/352] nice cache initialization --- src/arena.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 351d9fb5..87474bcd 100644 --- a/src/arena.c +++ b/src/arena.c @@ -128,8 +128,9 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena cache ----------------------------------------------------------- */ -#define MI_CACHE_FIELDS (16) -#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit +#define MI_CACHE_FIELDS (16) +#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit +#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) typedef struct mi_cache_slot_s { void* p; @@ -141,8 +142,8 @@ typedef struct mi_cache_slot_s { static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 #define BITS_SET() (UINTPTR_MAX) -static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_INIT16(BITS_SET) }; // zero bit = available! -static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_INIT16(BITS_SET) }; +static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! +static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free From 30799bce73d5abbbf088ec5804758f2adf7d5323 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 May 2020 11:42:38 -0700 Subject: [PATCH 052/352] fix assertion for huge segments --- src/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/segment.c b/src/segment.c index 9a59c878..83f6c3de 100644 --- a/src/segment.c +++ b/src/segment.c @@ -164,7 +164,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { } else { // free range of slices; only last slice needs a valid back offset mi_slice_t* last = &segment->slices[maxindex]; - if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= segment->slice_entries) { + if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= (segment->slice_entries - segment->segment_info_slices)) { mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset); } mi_assert_internal(slice == last || last->slice_count == 0 ); @@ -424,7 +424,7 @@ static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_ } static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - if (!segment->allow_decommit) return; // TODO: check option_decommit? + if (!segment->allow_decommit) return; if (segment->commit_mask == 1) return; // fully decommitted if (mi_option_get(mi_option_reset_delay) == 0) { mi_segment_commitx(segment, false, p, size, stats); @@ -698,7 +698,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (!commit_info_still_good) { segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed - segment->allow_decommit = mi_option_is_enabled(mi_option_allow_decommit); + segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_fixed); segment->decommit_expire = 0; segment->decommit_mask = 0; } From cce998a835a180715f52f0ddfc62f9877923e7ec Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 May 2020 11:42:49 -0700 Subject: [PATCH 053/352] fix assertion for huge blocks --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 7c7b5dc1..eb736fdb 100644 --- a/src/page.c +++ b/src/page.c @@ -783,7 +783,7 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size)); mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size); if (page != NULL) { - const size_t bsize = mi_page_usable_block_size(page); + const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(bsize >= size); From 74ea69b7847debd1e5e78909234bc3795fa36c4b Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 May 2020 16:33:29 -0700 Subject: [PATCH 054/352] increase default arena reset delay (behaves better on 36+ core systems) --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 1616e9a6..c87492fd 100644 --- a/src/options.c +++ b/src/options.c @@ -81,7 +81,7 @@ static mi_option_desc_t options[_mi_option_last] = #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 250, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 250, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds + { 500, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output From 82e29f47b38b11c7fefac98882e90cfa9cb5b80d Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 18 May 2020 18:51:06 -0700 Subject: [PATCH 055/352] weaken assertion, #245 --- src/alloc-aligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 7eeb9e92..45c7167a 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -54,7 +54,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t // .. and align within the allocation uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); - mi_assert_internal(adjust % sizeof(uintptr_t) == 0); + mi_assert_internal(adjust >= alignment); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); From e4ddc750697763613aa8c2e7862905489451a657 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 28 Aug 2020 08:46:51 -0700 Subject: [PATCH 056/352] set delayed decommit mask more precisely to only decommit currently committed blocks --- include/mimalloc-types.h | 2 +- src/segment.c | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 211ecfec..01e087b9 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -264,7 +264,7 @@ typedef mi_page_t mi_slice_t; typedef int64_t mi_msecs_t; -// Segments are large allocated memory blocks (2mb on 64 bit) from +// Segments are large allocated memory blocks (8mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { diff --git a/src/segment.c b/src/segment.c index 66c0be04..72df9c70 100644 --- a/src/segment.c +++ b/src/segment.c @@ -137,6 +137,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); + mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask); // can only decommit committed blocks //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -414,8 +415,9 @@ static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s if (commit && (segment->decommit_mask & mask) != 0) { segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); } - // always undo delayed decommits - segment->decommit_mask &= ~mask; + // always undo delayed decommits + segment->decommit_mask &= ~mask; + mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask); } static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { @@ -425,19 +427,19 @@ static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { if (!segment->allow_decommit) return; - if (segment->commit_mask == 1) return; // fully decommitted + if (segment->commit_mask == 1) return; // fully decommitted (1 = the initial segment metadata span) if (mi_option_get(mi_option_reset_delay) == 0) { mi_segment_commitx(segment, false, p, size, stats); } else { - // create mask + // register for future decommit in the decommit mask uint8_t* start; size_t full_size; uintptr_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size); if (mask==0 || full_size==0) return; // update delayed commit - segment->decommit_mask |= mask; + segment->decommit_mask |= (mask & segment->commit_mask); // only decommit what is committed; span_free may try to decommit more segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); } } From c1a834e8865d4b05f931cea85f8a65c3fc48a0a7 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 28 Aug 2020 10:40:46 -0700 Subject: [PATCH 057/352] add checks for when memory commit fails to return NULL --- src/segment.c | 79 ++++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 42 deletions(-) diff --git a/src/segment.c b/src/segment.c index 72df9c70..42919851 100644 --- a/src/segment.c +++ b/src/segment.c @@ -196,39 +196,19 @@ static size_t mi_segment_info_size(mi_segment_t* segment) { return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; } +static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t* page_size) +{ + ptrdiff_t idx = slice - segment->slices; + size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; + if (page_size != NULL) *page_size = psize; + return (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); +} + // Start of the page available memory; can be used on uninitialized pages uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); - ptrdiff_t idx = slice - segment->slices; - size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; - uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); - /* - if (idx == 0) { - // the first page starts after the segment info (and possible guard page) - p += segment->segment_info_size; - psize -= segment->segment_info_size; - - // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) - // to ensure this, we over-estimate and align with the OS page size - const size_t asize = _mi_os_page_size(); - uint8_t* q = (uint8_t*)_mi_align_up((uintptr_t)p, _mi_os_page_size()); - if (p < q) { - psize -= (q - p); - p = q; - } - mi_assert_internal((uintptr_t)p % _mi_os_page_size() == 0); - } - */ - /* TODO: guard pages between every slice span - if (MI_SECURE > 1 || (MI_SECURE == 1 && slice == &segment->slices[segment->slice_entries - 1])) { - // secure == 1: the last page has an os guard page at the end - // secure > 1: every page has an os guard page - psize -= _mi_os_page_size(); - } - */ - - if (page_size != NULL) *page_size = psize; + uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page_size); mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; @@ -394,21 +374,21 @@ static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative return mask; } -static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { +static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { // commit liberal, but decommit conservative uint8_t* start; size_t full_size; uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size); - if (mask==0 || full_size==0) return; + if (mask==0 || full_size==0) return true; if (commit && (segment->commit_mask & mask) != mask) { bool is_zero = false; - _mi_os_commit(start,full_size,&is_zero,stats); + if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; segment->commit_mask |= mask; } else if (!commit && (segment->commit_mask & mask) != 0) { mi_assert_internal((void*)start != (void*)segment); - _mi_os_decommit(start, full_size, stats); + _mi_os_decommit(start, full_size, stats); // ok if this fails segment->commit_mask &= ~mask; } // increase expiration of reusing part of the delayed decommit @@ -418,11 +398,12 @@ static void mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s // always undo delayed decommits segment->decommit_mask &= ~mask; mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask); + return true; } -static void mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - if (~segment->commit_mask == 0 && segment->decommit_mask==0) return; // fully committed - mi_segment_commitx(segment,true,p,size,stats); +static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + if (~segment->commit_mask == 0 && segment->decommit_mask==0) return true; // fully committed + return mi_segment_commitx(segment,true,p,size,stats); } static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { @@ -580,11 +561,18 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz slice->slice_count = (uint32_t)slice_count; } - +// Note: may still return NULL if committing the memory failed static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_slice_t* slice = &segment->slices[slice_index]; mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1); + + // commit before changing the slice data + if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) { + return NULL; // commit failed! + } + + // convert the slices to a page slice->slice_offset = 0; slice->slice_count = (uint32_t)slice_count; mi_assert_internal(slice->slice_count == slice_count); @@ -611,9 +599,8 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i last->slice_count = 0; last->xblock_size = 1; } - - // ensure the memory is committed - mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); + + // and initialize the page page->is_reset = false; page->is_committed = true; segment->used++; @@ -635,7 +622,13 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm mi_segment_slice_split(segment, slice, slice_count, tld); } mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); - return mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); + mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); + if (page == NULL) { + // commit failed; return NULL but first restore the slice + mi_segment_span_free_coalesce(slice, tld); + return NULL; + } + return page; } } sq++; @@ -732,7 +725,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ } // reserve first slices for segment info - mi_segment_span_allocate(segment, 0, info_slices, tld); + mi_page_t* page0 = mi_segment_span_allocate(segment, 0, info_slices, tld); + mi_assert_internal(page0!=NULL); if (page0==NULL) return NULL; // cannot fail as we always commit in advance mi_assert_internal(segment->used == 1); segment->used = 0; // don't count our internal slices towards usage @@ -744,6 +738,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ else { mi_assert_internal(huge_page!=NULL); *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); + mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance } mi_assert_expensive(mi_segment_is_valid(segment,tld)); From 228b5f6e9df525bfa22c808dce28653f58f92b1c Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 3 Sep 2020 12:19:04 -0700 Subject: [PATCH 058/352] use atomic load for segment map --- src/segment.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/segment.c b/src/segment.c index 2c559f04..c9fb364e 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1392,7 +1392,7 @@ static void mi_segment_map_allocated_at(const mi_segment_t* segment) { size_t index = mi_segment_map_index_of(segment, &bitidx); mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); if (index==0) return; - uintptr_t mask = mi_segment_map[index]; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; do { newmask = (mask | ((uintptr_t)1 << bitidx)); @@ -1404,7 +1404,7 @@ static void mi_segment_map_freed_at(const mi_segment_t* segment) { size_t index = mi_segment_map_index_of(segment, &bitidx); mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); if (index == 0) return; - uintptr_t mask = mi_segment_map[index]; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; do { newmask = (mask & ~((uintptr_t)1 << bitidx)); @@ -1417,7 +1417,8 @@ static mi_segment_t* _mi_segment_of(const void* p) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge - if (mi_likely((mi_segment_map[index] & ((uintptr_t)1 << bitidx)) != 0)) { + const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) { return segment; // yes, allocated by us } if (index==0) return NULL; @@ -1427,16 +1428,17 @@ static mi_segment_t* _mi_segment_of(const void* p) { // note: we could maintain a lowest index to speed up the path for invalid pointers? size_t lobitidx; size_t loindex; - uintptr_t lobits = mi_segment_map[index] & (((uintptr_t)1 << bitidx) - 1); + uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); if (lobits != 0) { loindex = index; lobitidx = _mi_bsr(lobits); } else { + uintptr_t lomask = mask; loindex = index - 1; - while (loindex > 0 && mi_segment_map[loindex] == 0) loindex--; + while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--; if (loindex==0) return NULL; - lobitidx = _mi_bsr(mi_segment_map[loindex]); + lobitidx = _mi_bsr(lomask); } // take difference as the addresses could be larger than the MAX_ADDRESS space. size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; From 7058e501cbdfc181a69456643915e4d0718fff0e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 3 Sep 2020 13:53:56 -0700 Subject: [PATCH 059/352] use atomic ops for the expire field; passes TSAN now --- src/arena.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/arena.c b/src/arena.c index 8f4e1783..959d59c5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -133,10 +133,10 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* #define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) typedef struct mi_cache_slot_s { - void* p; - size_t memid; - mi_msecs_t expire; - bool is_committed; // TODO: use bit from p to reduce size? + void* p; + size_t memid; + _Atomic(mi_msecs_t) expire; + bool is_committed; // TODO: use bit from p to reduce size? } mi_cache_slot_t; static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 @@ -179,7 +179,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co *is_zero = false; bool committed = slot->is_committed; slot->p = NULL; - slot->expire = 0; + mi_atomic_store_release(&slot->expire,0); if (*commit && !committed) { bool commit_zero; _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); @@ -203,15 +203,17 @@ static void mi_cache_purge(mi_os_tld_t* tld) { for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots if (idx >= MI_CACHE_MAX) idx = 0; // wrap mi_cache_slot_t* slot = &cache[idx]; - if (slot->expire != 0 && now >= slot->expire) { // racy read + mi_msecs_t expire = mi_atomic_load_relaxed(&slot->expire); + if (expire != 0 && now >= expire) { // racy read // seems expired, first claim it from available purged++; mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // was available, we claimed it - if (slot->expire != 0 && now >= slot->expire) { // safe read + expire = mi_atomic_load_acquire(&slot->expire); + if (expire != 0 && now >= expire) { // safe read // still expired, decommit it - slot->expire = 0; + mi_atomic_store_relaxed(&slot->expire,0); mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); _mi_abandoned_await_readers(); // wait until safe to decommit _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); @@ -252,7 +254,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; slot->p = start; slot->memid = memid; - slot->expire = 0; + mi_atomic_store_relaxed(&slot->expire,0); slot->is_committed = is_committed; if (is_committed && !is_large) { long delay = mi_option_get(mi_option_arena_reset_delay); @@ -262,7 +264,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit slot->is_committed = false; } else { - slot->expire = _mi_clock_now() + delay; + mi_atomic_store_release(&slot->expire, _mi_clock_now() + delay); } } From f6109765d822d686fa94466ddf26480f78d4a40c Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 3 Sep 2020 15:04:40 -0700 Subject: [PATCH 060/352] update whitespace and comments --- include/mimalloc-types.h | 2 +- src/arena.c | 8 ++++---- src/region.c | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 931d3270..6af46d18 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -122,7 +122,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) // 64kb on 64-bit -#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32mb on 64-bit +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 4mb on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) diff --git a/src/arena.c b/src/arena.c index 959d59c5..6a15c83d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +large blocks (>= MI_ARENA_BLOCK_SIZE, 8MiB). In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. @@ -55,9 +55,9 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); // size in count of arena blocks. typedef uintptr_t mi_block_info_t; #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_ALIGN // 64MiB -#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 4GiB -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB +#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_ALIGN // 8MiB +#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 512MiB +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 4MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor diff --git a/src/region.c b/src/region.c index e916e452..db2871d6 100644 --- a/src/region.c +++ b/src/region.c @@ -88,12 +88,12 @@ typedef union mi_region_info_u { typedef struct mem_region_s { _Atomic(uintptr_t) info; // mi_region_info_t.value _Atomic(void*) start; // start of the memory area - mi_bitmap_field_t in_use; // bit per in-use block - mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block - mi_bitmap_field_t reset; // track if reset per block + mi_bitmap_field_t in_use; // bit per in-use block + mi_bitmap_field_t dirty; // track if non-zero per block + mi_bitmap_field_t commit; // track if committed per block + mi_bitmap_field_t reset; // track if reset per block _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena - uintptr_t padding; // round to 8 fields + uintptr_t padding; // round to 8 fields } mem_region_t; // The region map From b22401deb3e97b24952c5b6191af79fa11733246 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 3 Sep 2020 20:31:11 -0700 Subject: [PATCH 061/352] layout --- src/arena.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 6a15c83d..c56c7b8e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -69,10 +69,10 @@ typedef struct mi_arena_s { bool is_zero_init; // is the arena zero initialized? bool is_committed; // is the memory committed bool is_large; // large OS page allocated - _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks + _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? - mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; From dc858f6d29c6288f49f2f26bd14a220a617b3a4c Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 09:23:22 -0700 Subject: [PATCH 062/352] fix c++ compilation with new atomics for dev-slice --- ide/vs2017/mimalloc-override.vcxproj | 8 -------- src/arena.c | 6 +++--- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 50a950b9..e48aa4d8 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -95,11 +95,7 @@ true true ../../include -<<<<<<< HEAD - MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); -======= _CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); ->>>>>>> dev MultiThreadedDebugDLL false Default @@ -127,11 +123,7 @@ true true ../../include -<<<<<<< HEAD - MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); -======= _CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); ->>>>>>> dev MultiThreadedDebugDLL false Default diff --git a/src/arena.c b/src/arena.c index c56c7b8e..731d4e20 100644 --- a/src/arena.c +++ b/src/arena.c @@ -179,7 +179,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co *is_zero = false; bool committed = slot->is_committed; slot->p = NULL; - mi_atomic_store_release(&slot->expire,0); + mi_atomic_store_release(&slot->expire,(mi_msecs_t)0); if (*commit && !committed) { bool commit_zero; _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); @@ -213,7 +213,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) { expire = mi_atomic_load_acquire(&slot->expire); if (expire != 0 && now >= expire) { // safe read // still expired, decommit it - mi_atomic_store_relaxed(&slot->expire,0); + mi_atomic_store_relaxed(&slot->expire,(mi_msecs_t)0); mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); _mi_abandoned_await_readers(); // wait until safe to decommit _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); @@ -254,7 +254,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; slot->p = start; slot->memid = memid; - mi_atomic_store_relaxed(&slot->expire,0); + mi_atomic_store_relaxed(&slot->expire,(mi_msecs_t)0); slot->is_committed = is_committed; if (is_committed && !is_large) { long delay = mi_option_get(mi_option_arena_reset_delay); From 4df01218e2f136d5eaaa443023331902dda51da5 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 10:03:37 -0700 Subject: [PATCH 063/352] fix msvc compilation with new atomics --- include/mimalloc-atomic.h | 34 ++++++++++++++++++++++++++++++++++ include/mimalloc-types.h | 1 - src/arena.c | 12 ++++++------ test/main-override-static.c | 3 +++ 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index e6f4ba0d..bb9430b0 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -106,6 +106,13 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; } +// Used by timers +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) + + #elif defined(_MSC_VER) @@ -189,6 +196,27 @@ static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_atomic_exchange_explicit(p,x,mo); #endif } +static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)* p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_X64) + return *p; +#else + int64_t old = *p; + int64_t x = old; + while ((old = InterlockedCompareExchange64(p, x, old)) != x) { + x = old; + } + return x; +#endif +} +static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)* p, int64_t x, mi_memory_order mo) { + (void)(mo); +#if defined(x_M_IX86) || defined(_M_X64) + *p = x; +#else + InterlockedExchange64(p,x); +#endif +} // These are used by the statistics static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) { @@ -222,6 +250,12 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) +#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed)) + + #endif diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index d482240b..7270f798 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -280,7 +280,6 @@ typedef mi_page_t mi_slice_t; typedef int64_t mi_msecs_t; - // Segments are large allocated memory blocks (8mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. diff --git a/src/arena.c b/src/arena.c index 731d4e20..c7f38c2c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -179,7 +179,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co *is_zero = false; bool committed = slot->is_committed; slot->p = NULL; - mi_atomic_store_release(&slot->expire,(mi_msecs_t)0); + mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); if (*commit && !committed) { bool commit_zero; _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); @@ -203,17 +203,17 @@ static void mi_cache_purge(mi_os_tld_t* tld) { for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots if (idx >= MI_CACHE_MAX) idx = 0; // wrap mi_cache_slot_t* slot = &cache[idx]; - mi_msecs_t expire = mi_atomic_load_relaxed(&slot->expire); + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); if (expire != 0 && now >= expire) { // racy read // seems expired, first claim it from available purged++; mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); if (mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // was available, we claimed it - expire = mi_atomic_load_acquire(&slot->expire); + expire = mi_atomic_loadi64_acquire(&slot->expire); if (expire != 0 && now >= expire) { // safe read // still expired, decommit it - mi_atomic_store_relaxed(&slot->expire,(mi_msecs_t)0); + mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); _mi_abandoned_await_readers(); // wait until safe to decommit _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); @@ -254,7 +254,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; slot->p = start; slot->memid = memid; - mi_atomic_store_relaxed(&slot->expire,(mi_msecs_t)0); + mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); slot->is_committed = is_committed; if (is_committed && !is_large) { long delay = mi_option_get(mi_option_arena_reset_delay); @@ -264,7 +264,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit slot->is_committed = false; } else { - mi_atomic_store_release(&slot->expire, _mi_clock_now() + delay); + mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); } } diff --git a/test/main-override-static.c b/test/main-override-static.c index ca65a0b2..0067be04 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -49,6 +49,7 @@ static inline uint8_t mi_bsr32(uint32_t x) { } #endif +/* // Bit scan reverse: return the index of the highest bit. uint8_t _mi_bsr(uintptr_t x) { if (x == 0) return 0; @@ -61,6 +62,8 @@ uint8_t _mi_bsr(uintptr_t x) { # error "define bsr for non-32 or 64-bit platforms" #endif } +*/ + static inline size_t _mi_wsize_from_size(size_t size) { return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); From 13bbb78907cb7276b729c36671602ccdf97dcf94 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 11:48:23 -0700 Subject: [PATCH 064/352] add dev-slice to azure test pipeline --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 68c150df..d0e27ffd 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,6 +6,7 @@ trigger: - master - dev +- dev-slice jobs: - job: From 3d708aa7e1f9801e1a860a9d00eef5bd8492d95e Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 12:16:46 -0700 Subject: [PATCH 065/352] fix warning in g++ --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index c7f38c2c..f946bfa9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -141,7 +141,7 @@ typedef struct mi_cache_slot_s { static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 -#define BITS_SET() (UINTPTR_MAX) +#define BITS_SET() ATOMIC_VAR_INIT(UINTPTR_MAX) static mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free From 85a8c138fcbb861dd947aa2bfc905a4cdfcc663f Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 12:18:09 -0700 Subject: [PATCH 066/352] enable verbose ctest on mac pipeline --- azure-pipelines.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d0e27ffd..48c8da98 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -126,7 +126,11 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) displayName: Make - - script: make test -C $(BuildType) + #- script: make test -C $(BuildType) + # displayName: CTest + - script: | + cd $(BuildType) + ctest --verbose --timeout 120 displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-macos-$(BuildType) From a0370f347cddfb4f39a43bca81f83edc8e01d023 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 12:20:21 -0700 Subject: [PATCH 067/352] more verbose ctest on mac pipeline --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 48c8da98..a82e6c32 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -130,6 +130,7 @@ jobs: # displayName: CTest - script: | cd $(BuildType) + export MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120 displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) From 0c5f03559d476b48467c1b0796cc4792dfe88f51 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 12:22:52 -0700 Subject: [PATCH 068/352] fix verbose ctest on mac pipeline --- azure-pipelines.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index a82e6c32..17961c62 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -130,8 +130,7 @@ jobs: # displayName: CTest - script: | cd $(BuildType) - export MIMALLOC_VERBOSE=1 - ctest --verbose --timeout 120 + MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120 displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-macos-$(BuildType) From 5fe80671a24327b5b6f12fa8b521f95bba4400a6 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 12:26:47 -0700 Subject: [PATCH 069/352] again try to fix verbose ctest on mac pipeline --- azure-pipelines.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 17961c62..7fc80a5e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -128,9 +128,8 @@ jobs: displayName: Make #- script: make test -C $(BuildType) # displayName: CTest - - script: | - cd $(BuildType) - MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120 + - script: MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120 + workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-macos-$(BuildType) From 7a08ca4dc6b687fa2b9351aa338a6aba43449861 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 12:30:13 -0700 Subject: [PATCH 070/352] again try to fix verbose ctest on mac pipeline --- azure-pipelines.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7fc80a5e..56f00790 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -128,8 +128,11 @@ jobs: displayName: Make #- script: make test -C $(BuildType) # displayName: CTest - - script: MIMALLOC_VERBOSE=1 ctest --verbose --timeout 120 + - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress workingDirectory: $(BuildType) - displayName: CTest + displayName: TestStress + - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api + workingDirectory: $(BuildType) + displayName: TestAPI # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-macos-$(BuildType) From 8834fe02da49b42b31fe2dd2d5e02a90dfc7ad14 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 12:31:28 -0700 Subject: [PATCH 071/352] again try to fix verbose ctest on mac pipeline --- azure-pipelines.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 56f00790..b0933cd3 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -128,11 +128,11 @@ jobs: displayName: Make #- script: make test -C $(BuildType) # displayName: CTest - - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress - workingDirectory: $(BuildType) - displayName: TestStress - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api workingDirectory: $(BuildType) displayName: TestAPI + - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress + workingDirectory: $(BuildType) + displayName: TestStress # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-macos-$(BuildType) From 63a9f45ba64e44162fa0de41030e0d012c5bcbba Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 19:39:10 -0700 Subject: [PATCH 072/352] add initial mi_commit_mask abstraction --- include/mimalloc-internal.h | 76 +++++++++++++++++++++++++++++++++++++ include/mimalloc-types.h | 10 +++-- src/os.c | 1 + src/segment.c | 61 +++++++++++++---------------- 4 files changed, 109 insertions(+), 39 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index a113b121..bcced4cb 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -650,6 +650,82 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } + +// ------------------------------------------------------------------- +// commit mask +// ------------------------------------------------------------------- + +static inline mi_commit_mask_t mi_commit_mask_empty(void) { + return 0; +} + +static inline mi_commit_mask_t mi_commit_mask_full(void) { + return ~mi_commit_mask_empty(); +} + +static inline mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) { + mi_assert_internal(bitidx < MI_INTPTR_BITS); + mi_assert_internal((bitidx + bitcount) <= MI_INTPTR_BITS); + if (bitcount == MI_INTPTR_BITS) { + mi_assert_internal(bitidx==0); + return mi_commit_mask_full(); + } + else if (bitcount == 0) { + return mi_commit_mask_empty(); + } + else { + return (((uintptr_t)1 << bitcount) - 1) << bitidx; + } +} + +static inline bool mi_commit_mask_is_empty(mi_commit_mask_t mask) { + return (mask == 0); +} + +static inline bool mi_commit_mask_is_full(mi_commit_mask_t mask) { + return (~mask == 0); +} + +static inline bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { + return ((commit & mask) == mask); +} + +static inline bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { + return ((commit & mask) != 0); +} + +static mi_decl_nodiscard inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) { + return (commit & mask); +} + +static inline void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) { + *commit = *commit & ~mask; +} + +static inline void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) { + *commit = *commit | mask; +} + +#define mi_commit_mask_foreach(mask,idx,count) \ + idx = 0; \ + while (mask != 0) { \ + /* count ones */ \ + count = 0; \ + while ((mask&1)==1) { \ + mask >>= 1; \ + count++; \ + } \ + /* if found, do action */ \ + if (count > 0) { + +#define mi_commit_mask_foreach_end() \ + } \ + idx += count; \ + /* shift out the zero */ \ + mask >>= 1; \ + idx++; \ + } + // ------------------------------------------------------------------- // Fast "random" shuffle // ------------------------------------------------------------------- diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 7270f798..06ab1ebe 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -276,9 +276,11 @@ typedef enum mi_segment_kind_e { #error "not enough commit bits to cover the segment size" #endif -typedef mi_page_t mi_slice_t; +typedef mi_page_t mi_slice_t; -typedef int64_t mi_msecs_t; +typedef int64_t mi_msecs_t; + +typedef uintptr_t mi_commit_mask_t; // Segments are large allocated memory blocks (8mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that @@ -290,8 +292,8 @@ typedef struct mi_segment_s { bool allow_decommit; mi_msecs_t decommit_expire; - uintptr_t decommit_mask; - uintptr_t commit_mask; + mi_commit_mask_t decommit_mask; + mi_commit_mask_t commit_mask; _Atomic(struct mi_segment_s*) abandoned_next; diff --git a/src/os.c b/src/os.c index cddc5b51..5c38989d 100644 --- a/src/os.c +++ b/src/os.c @@ -624,6 +624,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld) { + UNUSED(tld); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); diff --git a/src/segment.c b/src/segment.c index ac6d1a79..34fdf0bd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -268,8 +268,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // mi_segment_delayed_decommit(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); - _mi_arena_free(segment, mi_segment_size(segment), segment->memid, - (~segment->commit_mask == 0 && segment->decommit_mask == 0), segment->mem_is_fixed, tld->os); + bool fully_committed = (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask)); + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, fully_committed, segment->mem_is_fixed, tld->os); } @@ -339,7 +339,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { Span management ----------------------------------------------------------- */ -static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size) { +static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size) { mi_assert_internal(_mi_ptr_segment(p) == segment); if (size == 0 || size > MI_SEGMENT_SIZE) return 0; if (p >= (uint8_t*)segment + mi_segment_size(segment)) return 0; @@ -370,39 +370,38 @@ static uintptr_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative } mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8)); - uintptr_t mask = (((uintptr_t)1 << bitcount) - 1) << bitidx; - return mask; + return mi_commit_mask_create(bitidx, bitcount); } static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { // commit liberal, but decommit conservative uint8_t* start; size_t full_size; - uintptr_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size); - if (mask==0 || full_size==0) return true; + mi_commit_mask_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size); + if (mi_commit_mask_is_empty(mask) || full_size==0) return true; - if (commit && (segment->commit_mask & mask) != mask) { + if (commit && !mi_commit_mask_all_set(segment->commit_mask, mask)) { bool is_zero = false; if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; - segment->commit_mask |= mask; + mi_commit_mask_set(&segment->commit_mask,mask); } - else if (!commit && (segment->commit_mask & mask) != 0) { + else if (!commit && mi_commit_mask_any_set(segment->commit_mask,mask)) { mi_assert_internal((void*)start != (void*)segment); _mi_os_decommit(start, full_size, stats); // ok if this fails - segment->commit_mask &= ~mask; + mi_commit_mask_clear(&segment->commit_mask, mask); } // increase expiration of reusing part of the delayed decommit - if (commit && (segment->decommit_mask & mask) != 0) { + if (commit && mi_commit_mask_any_set(segment->decommit_mask, mask)) { segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); } // always undo delayed decommits - segment->decommit_mask &= ~mask; + mi_commit_mask_clear(&segment->decommit_mask, mask); mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask); return true; } static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - if (~segment->commit_mask == 0 && segment->decommit_mask==0) return true; // fully committed + if (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask)) return true; // fully committed return mi_segment_commitx(segment,true,p,size,stats); } @@ -416,44 +415,36 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ // register for future decommit in the decommit mask uint8_t* start; size_t full_size; - uintptr_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size); - if (mask==0 || full_size==0) return; + mi_commit_mask_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size); + if (mi_commit_mask_is_empty(mask) || full_size==0) return; // update delayed commit - segment->decommit_mask |= (mask & segment->commit_mask); // only decommit what is committed; span_free may try to decommit more + mi_commit_mask_set(&segment->decommit_mask, mi_commit_mask_intersect(mask,segment->commit_mask)); // only decommit what is committed; span_free may try to decommit more segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); } } static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { - if (segment->decommit_mask == 0) return; + if (mi_commit_mask_is_empty(segment->decommit_mask)) return; mi_msecs_t now = _mi_clock_now(); if (!force && now < segment->decommit_expire) return; - uintptr_t mask = segment->decommit_mask; + mi_commit_mask_t mask = segment->decommit_mask; segment->decommit_expire = 0; - segment->decommit_mask = 0; + segment->decommit_mask = mi_commit_mask_empty(); - uintptr_t idx = 0; - while (mask != 0) { - // count ones - size_t count = 0; - while ((mask&1)==1) { - mask >>= 1; - count++; - } + uintptr_t idx; + uintptr_t count; + mi_commit_mask_foreach(mask, idx, count) { // if found, decommit that sequence if (count > 0) { uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); size_t size = count * MI_COMMIT_SIZE; mi_segment_commitx(segment, false, p, size, stats); - idx += count; } - // shift out the 0 - mask >>= 1; - idx++; } - mi_assert_internal(segment->decommit_mask == 0); + mi_commit_mask_foreach_end() + mi_assert_internal(mi_commit_mask_is_empty(segment->decommit_mask)); } @@ -693,10 +684,10 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ } if (!commit_info_still_good) { - segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed + segment->commit_mask = (!commit ? 0x01 : mi_commit_mask_full()); // on lazy commit, the initial part is always committed segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_fixed); segment->decommit_expire = 0; - segment->decommit_mask = 0; + segment->decommit_mask = mi_commit_mask_empty(); } // initialize segment info From f7dc4847f26533ba6bf1a59b6955db4b497a35d7 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 21:58:32 -0700 Subject: [PATCH 073/352] keep commit_mask live in the cache for better reuse --- include/mimalloc-internal.h | 39 +++++++++---- src/arena.c | 109 ++++++++++++++++++++++++++---------- src/options.c | 4 +- src/segment.c | 29 ++++++---- 4 files changed, 126 insertions(+), 55 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index bcced4cb..8004ba84 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -71,9 +71,9 @@ bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) size_t _mi_os_good_alloc_size(size_t size); // arena.c -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld); // "segment.c" @@ -655,6 +655,8 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // commit mask // ------------------------------------------------------------------- +#define MI_COMMIT_MASK_BITS (sizeof(mi_commit_mask_t)*8) + static inline mi_commit_mask_t mi_commit_mask_empty(void) { return 0; } @@ -664,9 +666,9 @@ static inline mi_commit_mask_t mi_commit_mask_full(void) { } static inline mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) { - mi_assert_internal(bitidx < MI_INTPTR_BITS); - mi_assert_internal((bitidx + bitcount) <= MI_INTPTR_BITS); - if (bitcount == MI_INTPTR_BITS) { + mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); + mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); + if (bitcount == MI_COMMIT_MASK_BITS) { mi_assert_internal(bitidx==0); return mi_commit_mask_full(); } @@ -683,7 +685,7 @@ static inline bool mi_commit_mask_is_empty(mi_commit_mask_t mask) { } static inline bool mi_commit_mask_is_full(mi_commit_mask_t mask) { - return (~mask == 0); + return ((~mask) == 0); } static inline bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { @@ -694,18 +696,35 @@ static inline bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mas return ((commit & mask) != 0); } -static mi_decl_nodiscard inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) { +mi_decl_nodiscard static inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) { return (commit & mask); } static inline void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) { - *commit = *commit & ~mask; + *commit = (*commit) & (~mask); } static inline void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) { - *commit = *commit | mask; + *commit = (*commit) | mask; } +static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total) { + if (mi_commit_mask_is_full(mask)) { + return total; + } + else if (mi_commit_mask_is_empty(mask)) { + return 0; + } + else { + size_t count = 0; + for (; mask != 0; mask >>= 1) { // todo: use popcount + if ((mask&1)!=0) count++; + } + return (total/MI_COMMIT_MASK_BITS)*count; + } +} + + #define mi_commit_mask_foreach(mask,idx,count) \ idx = 0; \ while (mask != 0) { \ diff --git a/src/arena.c b/src/arena.c index f946bfa9..e7ea7bb1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -135,8 +135,8 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* typedef struct mi_cache_slot_s { void* p; size_t memid; + mi_commit_mask_t commit_mask; _Atomic(mi_msecs_t) expire; - bool is_committed; // TODO: use bit from p to reduce size? } mi_cache_slot_t; static mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 @@ -147,7 +147,10 @@ static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BIT static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free -static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { +static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { + UNUSED(tld); + UNUSED(commit); + // only segment blocks if (size != MI_SEGMENT_SIZE || alignment > MI_SEGMENT_ALIGN) return NULL; @@ -177,24 +180,55 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool* co void* p = slot->p; *memid = slot->memid; *is_zero = false; - bool committed = slot->is_committed; + mi_commit_mask_t cmask = slot->commit_mask; // copy slot->p = NULL; mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - if (*commit && !committed) { + // ignore commit request + /* + if (commit && !mi_commit_mask_is_full(cmask)) { bool commit_zero; - _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); - *commit = true; + bool ok = _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); // todo: only commit needed parts? + if (!ok) { + *commit_mask = cmask; + } + else { + *commit_mask = mi_commit_mask_full(); + } } else { - *commit = committed; - } - + */ + *commit_mask = cmask; + // mark the slot as free again mi_assert_internal(mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); return p; } +static void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) { + if (mi_commit_mask_is_empty(*cmask)) { + // nothing + } + else if (mi_commit_mask_is_full(*cmask)) { + _mi_os_decommit(p, total, stats); + } + else { + // todo: one call to decommit the whole at once? + mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); + size_t part = total/MI_COMMIT_MASK_BITS; + uintptr_t idx; + uintptr_t count; + mi_commit_mask_t mask = *cmask; + mi_commit_mask_foreach(mask, idx, count) { + void* start = (uint8_t*)p + (idx*part); + size_t size = count*part; + _mi_os_decommit(start, size, stats); + } + mi_commit_mask_foreach_end() + } + *cmask = mi_commit_mask_empty(); +} + static void mi_cache_purge(mi_os_tld_t* tld) { UNUSED(tld); mi_msecs_t now = _mi_clock_now(); @@ -214,10 +248,11 @@ static void mi_cache_purge(mi_os_tld_t* tld) { if (expire != 0 && now >= expire) { // safe read // still expired, decommit it mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(slot->is_committed && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); - slot->is_committed = false; + // decommit committed parts + mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); + //_mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); } mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } @@ -226,7 +261,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) { } } -static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) +static bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) { // only for segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; @@ -255,13 +290,12 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit slot->p = start; slot->memid = memid; mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->is_committed = is_committed; - if (is_committed && !is_large) { + slot->commit_mask = commit_mask; + if (!mi_commit_mask_is_empty(commit_mask) && !is_large) { long delay = mi_option_get(mi_option_arena_reset_delay); if (delay == 0) { _mi_abandoned_await_readers(); // wait until safe to decommit - _mi_os_decommit(start, size, tld->stats); - slot->is_committed = false; + mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); } else { mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); @@ -311,10 +345,10 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n } void* _mi_arena_alloc_aligned(size_t size, size_t alignment, - bool* commit, bool* large, bool* is_zero, + bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); + mi_assert_internal(commit_mask != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; @@ -339,9 +373,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); + bool acommit = commit; + void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; + if (p != NULL) { + *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); + return p; + } } } // try from another numa node instead.. @@ -351,43 +389,52 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); + bool acommit = commit; + void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; + if (p != NULL) { + *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); + return p; + } } } } // try to get from the cache - void* p = mi_cache_pop(numa_node, size, alignment, commit, large, is_zero, memid, tld); + void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld); if (p != NULL) return p; // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; - return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); + p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); + *commit_mask = ((p!=NULL && commit) ? mi_commit_mask_full() : mi_commit_mask_empty()); + return p; } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, commit_mask, large, is_zero, memid, tld); } /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool is_large, mi_os_tld_t* tld) { +void _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld->stats != NULL); if (p==NULL) return; if (size==0) return; if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) { - _mi_abandoned_await_readers(); // wait unti safe to free - _mi_os_free_ex(p, size, is_committed, tld->stats); + if (!mi_cache_push(p, size, memid, commit_mask, is_large, tld)) { + _mi_abandoned_await_readers(); // wait until safe to free + // TODO: is it safe on all platforms to free even it contains decommitted parts? (eg. macOS) + const size_t csize = mi_commit_mask_committed_size(commit_mask, size); + _mi_stat_decrease(&_mi_stats_main.committed, csize); + _mi_os_free_ex(p, size, false /*pretend decommitted to not double count stats*/, tld->stats); } } else { diff --git a/src/options.c b/src/options.c index 5fa9e2e7..fe94a1fb 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory @@ -82,7 +82,7 @@ static mi_option_desc_t options[_mi_option_last] = #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed #else - { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + { 4, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed { 250, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds diff --git a/src/segment.c b/src/segment.c index 34fdf0bd..7d2e12f4 100644 --- a/src/segment.c +++ b/src/segment.c @@ -137,7 +137,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); - mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask); // can only decommit committed blocks + mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask)); // can only decommit committed blocks //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -268,8 +268,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // mi_segment_delayed_decommit(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); - bool fully_committed = (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask)); - _mi_arena_free(segment, mi_segment_size(segment), segment->memid, fully_committed, segment->mem_is_fixed, tld->os); + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->commit_mask, segment->mem_is_fixed, tld->os); } @@ -382,11 +381,15 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s if (commit && !mi_commit_mask_all_set(segment->commit_mask, mask)) { bool is_zero = false; - if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; + mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask); + _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap + if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; mi_commit_mask_set(&segment->commit_mask,mask); } else if (!commit && mi_commit_mask_any_set(segment->commit_mask,mask)) { mi_assert_internal((void*)start != (void*)segment); + mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap _mi_os_decommit(start, full_size, stats); // ok if this fails mi_commit_mask_clear(&segment->commit_mask, mask); } @@ -401,6 +404,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s } static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask)); if (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask)) return true; // fully committed return mi_segment_commitx(segment,true,p,size,stats); } @@ -648,29 +652,30 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) const bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (required > 0); + const bool commit = eager || (required > 0); // Try to get from our cache first bool is_zero = false; const bool commit_info_still_good = (segment != NULL); + mi_commit_mask_t commit_mask = (segment != NULL ? segment->commit_mask : mi_commit_mask_empty()); if (segment==NULL) { // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy size_t memid = 0; - // segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld); - segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); - + segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &commit_mask, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - if (!commit) { + + if (!mi_commit_mask_all_set(commit_mask,mi_commit_mask_create(0, 1))) { // at least commit the info slices mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); bool ok = _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats); - if (!ok) return NULL; // failed to commit + if (!ok) return NULL; // failed to commit + mi_commit_mask_set(&commit_mask,mi_commit_mask_create(0, 1)); } segment->memid = memid; segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_committed = mi_commit_mask_is_full(commit_mask); mi_segments_track_size((long)(segment_size), tld); mi_segment_map_allocated_at(segment); } @@ -684,7 +689,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ } if (!commit_info_still_good) { - segment->commit_mask = (!commit ? 0x01 : mi_commit_mask_full()); // on lazy commit, the initial part is always committed + segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_fixed); segment->decommit_expire = 0; segment->decommit_mask = mi_commit_mask_empty(); From f9ca7cd05a8fa83e69198a92a12baf9f37257712 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Sep 2020 22:16:58 -0700 Subject: [PATCH 074/352] use proper file descriptor in mmap for decommit --- src/os.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/os.c b/src/os.c index 5c38989d..4696a4d7 100644 --- a/src/os.c +++ b/src/os.c @@ -345,6 +345,17 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr return p; } +static int mi_unix_mmap_fd(void) { +#if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) os_tag = 100; + return VM_MAKE_TAG(os_tag); +#else + return -1; +#endif +} + static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; #if !defined(MAP_ANONYMOUS) @@ -354,7 +365,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #define MAP_NORESERVE 0 #endif int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; - int fd = -1; + const int fd = mi_unix_mmap_fd(); #if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); @@ -365,13 +376,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) os_tag = 100; - fd = VM_MAKE_TAG(os_tag); - #endif + #endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); @@ -713,7 +718,8 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ #elif defined(MAP_FIXED) if (!commit) { // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); + const int fd = mi_unix_mmap_fd(); + void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); if (p != start) { err = errno; } } else { From a372847ccfbcc37afdcd64fb3f81f7d3d698d1cf Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 08:57:56 -0700 Subject: [PATCH 075/352] verbose ctest on Linux pipeline --- azure-pipelines.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 85529e2a..a922e569 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -100,7 +100,8 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: make test -C $(BuildType) + - script: ctest --verbose --timeout 120 + workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-ubuntu-$(BuildType) From e703bfc3192569dcf9984e536027f60ca887100b Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 09:02:15 -0700 Subject: [PATCH 076/352] build windows pipeline in parallel --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index a922e569..dd544d18 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -37,6 +37,7 @@ jobs: inputs: solution: $(BuildType)/libmimalloc.sln configuration: '$(MSBuildConfiguration)' + msbuildArguments: -m - script: | cd $(BuildType) ctest --verbose --timeout 120 From e2ae9f3125b9e45025858f700e967b22a8033bce Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 09:14:32 -0700 Subject: [PATCH 077/352] fix pipeline script for macOS --- azure-pipelines.yml | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index a25fa3a7..4ce98c50 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -129,20 +129,16 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) displayName: Make -<<<<<<< HEAD - #- script: make test -C $(BuildType) - # displayName: CTest - - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api - workingDirectory: $(BuildType) - displayName: TestAPI - - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress - workingDirectory: $(BuildType) - displayName: TestStress -======= + # - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api + # workingDirectory: $(BuildType) + # displayName: TestAPI + # - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress + # workingDirectory: $(BuildType) + # displayName: TestStress - script: ctest --verbose --timeout 120 workingDirectory: $(BuildType) displayName: CTest ->>>>>>> dev + # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-macos-$(BuildType) From 5ae01fe4d92de2b94bee45f7b1d3f8df322bab40 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 09:39:16 -0700 Subject: [PATCH 078/352] experiment with commit strategy on macOS --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 4696a4d7..6e8f6149 100644 --- a/src/os.c +++ b/src/os.c @@ -715,7 +715,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ } #elif defined(__wasi__) // WebAssembly guests can't control memory protection - #elif defined(MAP_FIXED) + #elif defined(MAP_FIXED) && !defined(__APPLE__) if (!commit) { // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) const int fd = mi_unix_mmap_fd(); From 828613a6942e6891bff514d42198d890114c3296 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 12:06:56 -0700 Subject: [PATCH 079/352] use MADV_DONTNEED for commit/decommit on macOS --- src/os.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/os.c b/src/os.c index 6e8f6149..13785b65 100644 --- a/src/os.c +++ b/src/os.c @@ -716,20 +716,35 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ #elif defined(__wasi__) // WebAssembly guests can't control memory protection #elif defined(MAP_FIXED) && !defined(__APPLE__) + // Linux if (!commit) { - // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) + // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) const int fd = mi_unix_mmap_fd(); void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); if (p != start) { err = errno; } } else { - // for commit, just change the protection + // commit: just change the protection err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } } #else - err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); - if (err != 0) { err = errno; } + // MacOS and others. + if (!commit) { + #if defined(MADV_DONTNEED) + // decommit: use MADV_DONTNEED as it decrease rss immediately (unlike MADV_FREE) + err = madvise(start, csize, MADV_DONTNEED); + #else + // decommit: just disable access + err = mprotect(start, csize, PROT_NONE); + if (err != 0) { err = errno; } + #endif + } + else { + // commit: ensure we can access the area + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } #endif if (err != 0) { _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); From 8e0d846b40fa44225227e01531c6b45a1f6f79a8 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 12:19:05 -0700 Subject: [PATCH 080/352] consistent commit order --- src/os.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/os.c b/src/os.c index 13785b65..7cede199 100644 --- a/src/os.c +++ b/src/os.c @@ -704,8 +704,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ #if defined(_WIN32) if (commit) { - // if the memory was already committed, the call succeeds but it is not zero'd - // *is_zero = true; + // *is_zero = true; // note: if the memory was already committed, the call succeeds but the memory is not zero'd void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE); err = (p == start ? 0 : GetLastError()); } @@ -717,22 +716,27 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ // WebAssembly guests can't control memory protection #elif defined(MAP_FIXED) && !defined(__APPLE__) // Linux - if (!commit) { + if (commit) { + // commit: just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) const int fd = mi_unix_mmap_fd(); void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); if (p != start) { err = errno; } } - else { - // commit: just change the protection + #else + // macOSX and others. + if (commit) { + // commit: ensure we can access the area err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } - } - #else - // MacOS and others. - if (!commit) { + } + else { #if defined(MADV_DONTNEED) - // decommit: use MADV_DONTNEED as it decrease rss immediately (unlike MADV_FREE) + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) err = madvise(start, csize, MADV_DONTNEED); #else // decommit: just disable access @@ -740,11 +744,6 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ if (err != 0) { err = errno; } #endif } - else { - // commit: ensure we can access the area - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } #endif if (err != 0) { _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); From 45300ac43d500836fa496167fbf64b84be7b65f3 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 13:24:47 -0700 Subject: [PATCH 081/352] merge from dev --- include/mimalloc-atomic.h | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index b25a0ac3..b6506075 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -217,27 +217,6 @@ static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi InterlockedExchange64(p, x); #endif } -static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)* p, mi_memory_order mo) { - (void)(mo); -#if defined(_M_X64) - return *p; -#else - int64_t old = *p; - int64_t x = old; - while ((old = InterlockedCompareExchange64(p, x, old)) != x) { - x = old; - } - return x; -#endif -} -static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)* p, int64_t x, mi_memory_order mo) { - (void)(mo); -#if defined(x_M_IX86) || defined(_M_X64) - *p = x; -#else - InterlockedExchange64(p,x); -#endif -} // These are used by the statistics static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { From 38261322409d813e8fd7edbc22501bd4790363de Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 14:51:20 -0700 Subject: [PATCH 082/352] use dynamic initial commit --- src/segment.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/segment.c b/src/segment.c index 7d2e12f4..d5c8a6ec 100644 --- a/src/segment.c +++ b/src/segment.c @@ -411,7 +411,6 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { if (!segment->allow_decommit) return; - if (segment->commit_mask == 1) return; // fully decommitted (1 = the initial segment metadata span) if (mi_option_get(mi_option_reset_delay) == 0) { mi_segment_commitx(segment, false, p, size, stats); } @@ -666,12 +665,14 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (segment == NULL) return NULL; // failed to allocate mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - if (!mi_commit_mask_all_set(commit_mask,mi_commit_mask_create(0, 1))) { + const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_assert_internal(commit_needed>0); + if (!mi_commit_mask_all_set(commit_mask,mi_commit_mask_create(0, commit_needed))) { // at least commit the info slices - mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); - bool ok = _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats); + mi_assert_internal(commit_needed*MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); + bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, &is_zero, tld->stats); if (!ok) return NULL; // failed to commit - mi_commit_mask_set(&commit_mask,mi_commit_mask_create(0, 1)); + mi_commit_mask_set(&commit_mask,mi_commit_mask_create(0, commit_needed)); } segment->memid = memid; segment->mem_is_fixed = mem_large; From 953bbde089ec4e163dabf508f2f77d34920f600e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Sep 2020 15:09:51 -0700 Subject: [PATCH 083/352] fix is_in_same_page check --- include/mimalloc-internal.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 8004ba84..1575c1fc 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -585,7 +585,11 @@ static inline bool mi_is_in_same_segment(const void* p, const void* q) { static inline bool mi_is_in_same_page(const void* p, const void* q) { mi_segment_t* segment = _mi_ptr_segment(p); if (_mi_ptr_segment(q) != segment) return false; - return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q)); + // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q)); + mi_page_t* page = _mi_segment_page_of(segment, p); + size_t psize; + uint8_t* start = _mi_segment_page_start(segment, page, &psize); + return (start <= (uint8_t*)q && (uint8_t*)q < start + psize); } static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) { From 313008ecaa27b016e6bd78e1a83ebafe5fe381bb Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Sep 2020 15:20:59 -0700 Subject: [PATCH 084/352] ensure page->retire_expire is always 1 --- src/page.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 9d919cfa..d9b5412d 100644 --- a/src/page.c +++ b/src/page.c @@ -389,6 +389,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } +// Retire parameters #define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX #define MI_RETIRE_CYCLES (8) @@ -415,7 +416,7 @@ void _mi_page_retire(mi_page_t* page) { if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); + page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; From 6b013d5f38b9349fd21c35e49c90bef83612060f Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Sep 2020 22:55:36 -0700 Subject: [PATCH 085/352] test for arena count early; skip test in bitmap_mask_ for perf --- src/arena.c | 78 ++++++++++++++++++++++-------------------------- src/bitmap.inc.c | 5 ++-- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/src/arena.c b/src/arena.c index 3556f6e4..1ed782ef 100644 --- a/src/arena.c +++ b/src/arena.c @@ -8,23 +8,18 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 8MiB). +large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. Currently arenas are only used to for huge OS page (1GiB) reservations, -otherwise it delegates to direct allocation from the OS. +or direct OS memory reservations -- otherwise it delegates to direct allocation from the OS. In the future, we can expose an API to manually add more kinds of arenas which is sometimes needed for embedded devices or shared memory for example. (We can also employ this with WASI or `sbrk` systems to reserve large arenas on demand and be able to reuse them efficiently). -The arena allocation needs to be thread safe and we use an atomic -bitmap to allocate. The current implementation of the bitmap can -only do this within a field (`uintptr_t`) so we can allocate at most -blocks of 2GiB (64*32MiB) and no object can cross the boundary. This -can lead to fragmentation but fortunately most objects will be regions -of 256MiB in practice. +The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" @@ -55,7 +50,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); // size in count of arena blocks. typedef uintptr_t mi_block_info_t; #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_ALIGN // 8MiB +#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_SIZE // 8MiB #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 4MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) @@ -352,43 +347,42 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, const int numa_node = _mi_os_numa_node(tld); // current numa node // try to allocate in an arena if the alignment is small enough - // and the object is not too large or too small. - if (alignment <= MI_SEGMENT_ALIGN && - // size <= MI_ARENA_MAX_OBJ_SIZE && - size >= MI_ARENA_MIN_OBJ_SIZE) - { - const size_t bcount = mi_block_count_of_size(size); - - mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); - // try numa affine allocation - for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? + // and the object is not too large or too small. + if (alignment <= MI_SEGMENT_ALIGN && size >= MI_ARENA_MIN_OBJ_SIZE) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + if (mi_unlikely(max_arena > 0)) { + const size_t bcount = mi_block_count_of_size(size); + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + // try numa affine allocation + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena==NULL) break; // end reached + if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - bool acommit = commit; - void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) { - *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); - return p; + { + bool acommit = commit; + void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) { + *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); + return p; + } } } - } - // try from another numa node instead.. - for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! + // try from another numa node instead.. + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena==NULL) break; // end reached + if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - bool acommit = commit; - void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) { - *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); - return p; + { + bool acommit = commit; + void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) { + *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); + return p; + } } } } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 07f48277..1ee17556 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -67,8 +67,8 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); mi_assert_internal(count > 0); - if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; - if (count == 0) return 0; + //if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; + //if (count == 0) return 0; return ((((uintptr_t)1 << count) - 1) << bitidx); } @@ -145,6 +145,7 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx { mi_assert_internal(bitmap_idx != NULL); mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); + mi_assert_internal(count > 0); _Atomic(uintptr_t)* field = &bitmap[idx]; uintptr_t map = mi_atomic_load_relaxed(field); if (map==MI_BITMAP_FIELD_FULL) return false; // short cut From 97629cefaa53dbd05d142d6d4886d9b3093bd22d Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 8 Sep 2020 11:12:23 -0700 Subject: [PATCH 086/352] tune performance options with longer reset delay --- src/options.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/options.c b/src/options.c index fe94a1fb..ecbbf30d 100644 --- a/src/options.c +++ b/src/options.c @@ -81,12 +81,14 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed -#else +#elif defined(_WIN32) { 4, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) +#else + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif - { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed - { 250, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 500, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds + { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output From 161f9a7751c29e6930430fba850f0f7cdda5583d Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 8 Sep 2020 11:12:44 -0700 Subject: [PATCH 087/352] refactor arena allocation --- src/arena.c | 115 ++++++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 52 deletions(-) diff --git a/src/arena.c b/src/arena.c index 8af6f9b9..e0524b77 100644 --- a/src/arena.c +++ b/src/arena.c @@ -136,7 +136,7 @@ static mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BIT static mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free -static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { +static mi_decl_noinline void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { UNUSED(tld); UNUSED(commit); @@ -194,7 +194,7 @@ static void* mi_cache_pop(int numa_node, size_t size, size_t alignment, bool com return p; } -static void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) { +static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) { if (mi_commit_mask_is_empty(*cmask)) { // nothing } @@ -218,7 +218,7 @@ static void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t tot *cmask = mi_commit_mask_empty(); } -static void mi_cache_purge(mi_os_tld_t* tld) { +static mi_decl_noinline void mi_cache_purge(mi_os_tld_t* tld) { UNUSED(tld); mi_msecs_t now = _mi_clock_now(); size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start @@ -250,7 +250,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) { } } -static bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) +static mi_decl_noinline bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) { // only for segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; @@ -301,8 +301,8 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask Arena Allocation ----------------------------------------------------------- */ -static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_bitmap_index_t bitmap_index; if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; @@ -333,6 +333,52 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n return p; } +static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + UNUSED_RELEASE(alignment); + mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + const size_t bcount = mi_block_count_of_size(size); + if (mi_likely(max_arena == 0)) return NULL; + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + + // try numa affine allocation + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena==NULL) break; // end reached + if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + bool acommit = commit; + void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) { + *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); + return p; + } + } + } + + // try from another numa node instead.. + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena==NULL) break; // end reached + if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + bool acommit = commit; + void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) { + *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); + return p; + } + } + } + return NULL; +} + + void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) @@ -343,60 +389,25 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, *is_zero = false; bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` + if (large==NULL) large = &default_large; // ensure `large != NULL` const int numa_node = _mi_os_numa_node(tld); // current numa node - // try to allocate in an arena if the alignment is small enough - // and the object is not too large or too small. - if (alignment <= MI_SEGMENT_ALIGN && size >= MI_ARENA_MIN_OBJ_SIZE) { - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - if (mi_unlikely(max_arena > 0)) { - const size_t bcount = mi_block_count_of_size(size); - mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); - // try numa affine allocation - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - bool acommit = commit; - void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) { - *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); - return p; - } - } - } - // try from another numa node instead.. - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - bool acommit = commit; - void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) { - *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); - return p; - } - } - } - } + // try to get from the cache + if (size == MI_SEGMENT_SIZE && alignment <= MI_SEGMENT_ALIGN) { + void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld); + if (p != NULL) return p; } - // try to get from the cache - void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld); - if (p != NULL) return p; - + // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) { + void* p = mi_arena_allocate(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld); + if (p != NULL) return p; + } // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; - p = _mi_os_alloc_aligned(size, alignment, commit, large, tld->stats); + void* p = _mi_os_alloc_aligned(size, alignment, commit, large, tld->stats); *commit_mask = ((p!=NULL && commit) ? mi_commit_mask_full() : mi_commit_mask_empty()); return p; } From 037285ac0980b648d5c4ad6b359ac57d5f21e543 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 8 Sep 2020 13:27:34 -0700 Subject: [PATCH 088/352] refactor segment cache and map in a separate source file --- CMakeLists.txt | 1 + ide/vs2017/mimalloc-override.vcxproj | 1 + ide/vs2017/mimalloc-override.vcxproj.filters | 3 + ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 + ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc-override.vcxproj.filters | 5 +- ide/vs2019/mimalloc.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj.filters | 3 + include/mimalloc-internal.h | 29 +- include/mimalloc-types.h | 1 + src/arena.c | 229 +------------- src/bitmap.c | 2 +- src/segment-cache.c | 310 +++++++++++++++++++ src/segment.c | 152 +-------- src/static.c | 1 + 16 files changed, 378 insertions(+), 365 deletions(-) create mode 100644 src/segment-cache.c diff --git a/CMakeLists.txt b/CMakeLists.txt index acd16a98..a9fdb259 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ set(mi_sources src/os.c src/bitmap.c src/arena.c + src/segment-cache.c src/segment.c src/page.c src/alloc.c diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index ab02a658..a87b69ac 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -246,6 +246,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 2544c06b..d01f9311 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -79,5 +79,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index f212b619..41fb77c1 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -242,6 +242,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 128126c1..05417645 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -62,6 +62,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 000958ee..4136e574 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -246,6 +246,7 @@ + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index 38e83578..d6b7b5a9 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -46,6 +46,9 @@ Source Files + + Source Files + @@ -67,7 +70,7 @@ Header Files - Source Files + Header Files diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index b1184cb5..98dee520 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -234,6 +234,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 8d071d29..92be7cb4 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -49,6 +49,9 @@ Source Files + + Source Files + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 7ddfa38f..11733c66 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -62,19 +62,24 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -size_t _mi_os_good_alloc_size(size_t size); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +size_t _mi_os_good_alloc_size(size_t size); // arena.c -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); +// "segment-cache.c" +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld); +void _mi_segment_map_allocated_at(const mi_segment_t* segment); +void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -463,6 +468,10 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { return mi_page_block_size(page) - MI_PADDING_SIZE; } +// size of a segment +static inline size_t mi_segment_size(mi_segment_t* segment) { + return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; +} // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 06ab1ebe..8524de8a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -114,6 +114,7 @@ terms of the MIT license. A copy of the license can be found in the file // Derived constants #define MI_SEGMENT_SIZE (1ULL< MI_SEGMENT_ALIGN) return NULL; - - // numa node determines start field - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot - mi_bitmap_index_t bitidx = 0; - bool claimed = false; - if (*large) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (claimed) *large = true; - } - if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (claimed) *large = false; - } - - if (!claimed) return NULL; - - // found a slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - void* p = slot->p; - *memid = slot->memid; - *is_zero = false; - mi_commit_mask_t cmask = slot->commit_mask; // copy - slot->p = NULL; - mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - // ignore commit request - /* - if (commit && !mi_commit_mask_is_full(cmask)) { - bool commit_zero; - bool ok = _mi_os_commit(p, MI_SEGMENT_SIZE, &commit_zero, tld->stats); // todo: only commit needed parts? - if (!ok) { - *commit_mask = cmask; - } - else { - *commit_mask = mi_commit_mask_full(); - } - } - else { - */ - *commit_mask = cmask; - - // mark the slot as free again - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); - return p; -} - -static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) { - if (mi_commit_mask_is_empty(*cmask)) { - // nothing - } - else if (mi_commit_mask_is_full(*cmask)) { - _mi_os_decommit(p, total, stats); - } - else { - // todo: one call to decommit the whole at once? - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t part = total/MI_COMMIT_MASK_BITS; - uintptr_t idx; - uintptr_t count; - mi_commit_mask_t mask = *cmask; - mi_commit_mask_foreach(mask, idx, count) { - void* start = (uint8_t*)p + (idx*part); - size_t size = count*part; - _mi_os_decommit(start, size, stats); - } - mi_commit_mask_foreach_end() - } - *cmask = mi_commit_mask_empty(); -} - -static mi_decl_noinline void mi_cache_purge(mi_os_tld_t* tld) { - UNUSED(tld); - mi_msecs_t now = _mi_clock_now(); - size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start - size_t purged = 0; - for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots - if (idx >= MI_CACHE_MAX) idx = 0; // wrap - mi_cache_slot_t* slot = &cache[idx]; - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); - if (expire != 0 && now >= expire) { // racy read - // seems expired, first claim it from available - purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { - // was available, we claimed it - expire = mi_atomic_loadi64_acquire(&slot->expire); - if (expire != 0 && now >= expire) { // safe read - // still expired, decommit it - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); - _mi_abandoned_await_readers(); // wait until safe to decommit - // decommit committed parts - mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - //_mi_os_decommit(slot->p, MI_SEGMENT_SIZE, tld->stats); - } - _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop - } - if (purged > 4) break; // bound to no more than 4 purge tries per push - } - } -} - -static mi_decl_noinline bool mi_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) -{ - // only for segment blocks - if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // numa node determines start field - int numa_node = _mi_os_numa_node(NULL); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // purge expired entries - mi_cache_purge(tld); - - // find an available slot - mi_bitmap_index_t bitidx; - bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (!claimed) return false; - - mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); - - // set the slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - slot->p = start; - slot->memid = memid; - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->commit_mask = commit_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large) { - long delay = mi_option_get(mi_option_arena_reset_delay); - if (delay == 0) { - _mi_abandoned_await_readers(); // wait until safe to decommit - mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - } - else { - mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); - } - } - - // make it available - _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); - return true; -} - - /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ @@ -333,7 +147,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren return p; } -static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); @@ -349,11 +163,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - bool acommit = commit; - void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) { - *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); return p; } } @@ -366,11 +178,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - bool acommit = commit; - void* p = mi_arena_alloc_from(arena, i, bcount, &acommit, large, is_zero, memid, tld); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) { - *commit_mask = (acommit ? mi_commit_mask_full() : mi_commit_mask_empty()); return p; } } @@ -379,11 +189,10 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, - bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(commit_mask != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); + mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; @@ -392,49 +201,35 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if (large==NULL) large = &default_large; // ensure `large != NULL` const int numa_node = _mi_os_numa_node(tld); // current numa node - // try to get from the cache - if (size == MI_SEGMENT_SIZE && alignment <= MI_SEGMENT_ALIGN) { - void* p = mi_cache_pop(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld); - if (p != NULL) return p; - } - // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) { - void* p = mi_arena_allocate(numa_node, size, alignment, commit, commit_mask, large, is_zero, memid, tld); + void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_zero, memid, tld); if (p != NULL) return p; } // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; - void* p = _mi_os_alloc_aligned(size, alignment, commit, large, tld->stats); - *commit_mask = ((p!=NULL && commit) ? mi_commit_mask_full() : mi_commit_mask_empty()); - return p; + return _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats); } -void* _mi_arena_alloc(size_t size, bool commit, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, commit_mask, large, is_zero, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld); } /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) { +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld->stats != NULL); if (p==NULL) return; if (size==0) return; if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - if (!mi_cache_push(p, size, memid, commit_mask, is_large, tld)) { - _mi_abandoned_await_readers(); // wait until safe to free - // TODO: is it safe on all platforms to free even it contains decommitted parts? (eg. macOS) - const size_t csize = mi_commit_mask_committed_size(commit_mask, size); - _mi_stat_decrease(&_mi_stats_main.committed, csize); - _mi_os_free_ex(p, size, false /*pretend decommitted to not double count stats*/, tld->stats); - } + _mi_os_free_ex(p, size, is_committed, tld->stats); } else { // allocated in an arena diff --git a/src/bitmap.c b/src/bitmap.c index 93487a32..ad5a9552 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -40,7 +40,7 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { // Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); diff --git a/src/segment-cache.c b/src/segment-cache.c new file mode 100644 index 00000000..569e878a --- /dev/null +++ b/src/segment-cache.c @@ -0,0 +1,310 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2020, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- + Implements a cache of segments to avoid expensive OS calls + and also the full memory map of all segments. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include "bitmap.h" // atomic bitmap + +#define MI_CACHE_FIELDS (16) +#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit + +#define BITS_SET() ATOMIC_VAR_INIT(UINTPTR_MAX) +#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) + +typedef struct mi_cache_slot_s { + void* p; + size_t memid; + mi_commit_mask_t commit_mask; + _Atomic(mi_msecs_t) expire; +} mi_cache_slot_t; + +static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 + +static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! +static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; +static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free + + +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + // only segment blocks + if (size != MI_SEGMENT_SIZE) return NULL; + + // numa node determines start field + const int numa_node = _mi_os_numa_node(tld); + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // find an available slot + mi_bitmap_index_t bitidx = 0; + bool claimed = false; + if (*large) { // large allowed? + claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = true; + } + if (!claimed) { + claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = false; + } + + if (!claimed) return NULL; + + // found a slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + void* p = slot->p; + *memid = slot->memid; + *is_zero = false; + mi_commit_mask_t cmask = slot->commit_mask; // copy + slot->p = NULL; + mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); + *commit_mask = cmask; + + // mark the slot as free again + mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); + _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); + return p; +} + +static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) +{ + if (mi_commit_mask_is_empty(*cmask)) { + // nothing + } + else if (mi_commit_mask_is_full(*cmask)) { + _mi_os_decommit(p, total, stats); + } + else { + // todo: one call to decommit the whole at once? + mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); + size_t part = total/MI_COMMIT_MASK_BITS; + uintptr_t idx; + uintptr_t count; + mi_commit_mask_t mask = *cmask; + mi_commit_mask_foreach(mask, idx, count) { + void* start = (uint8_t*)p + (idx*part); + size_t size = count*part; + _mi_os_decommit(start, size, stats); + } + mi_commit_mask_foreach_end() + } + *cmask = mi_commit_mask_empty(); +} + +#define MI_MAX_PURGE_PER_PUSH (4) + +static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) +{ + UNUSED(tld); + mi_msecs_t now = _mi_clock_now(); + size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start + size_t purged = 0; + for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots + if (idx >= MI_CACHE_MAX) idx = 0; // wrap + mi_cache_slot_t* slot = &cache[idx]; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); + if (expire != 0 && now >= expire) { // racy read + // seems expired, first claim it from available + purged++; + mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); + if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { + // was available, we claimed it + expire = mi_atomic_loadi64_acquire(&slot->expire); + if (expire != 0 && now >= expire) { // safe read + // still expired, decommit it + mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); + mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + _mi_abandoned_await_readers(); // wait until safe to decommit + // decommit committed parts + // TODO: instead of decommit, we could also free to the OS? + mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); + } + _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop + } + if (purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push + } + } +} + +mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld) +{ + // only for normal segment blocks + if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; + + // numa node determines start field + int numa_node = _mi_os_numa_node(NULL); + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // purge expired entries + mi_segment_cache_purge(tld); + + // find an available slot + mi_bitmap_index_t bitidx; + bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (!claimed) return false; + + mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + + // set the slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + slot->p = start; + slot->memid = memid; + mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); + slot->commit_mask = commit_mask; + if (!mi_commit_mask_is_empty(commit_mask) && !is_large) { + long delay = mi_option_get(mi_option_arena_reset_delay); + if (delay == 0) { + _mi_abandoned_await_readers(); // wait until safe to decommit + mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); + } + else { + mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); + } + } + + // make it available + _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); + return true; +} + + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ + + +#if (MI_INTPTR_SIZE==8) +#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB +#else +#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? + uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; + *bitidx = segindex % (8*MI_INTPTR_SIZE); + return (segindex / (8*MI_INTPTR_SIZE)); +} + +void _mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + if (index==0) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +void _mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + if (index == 0) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + mi_segment_t* segment = _mi_ptr_segment(p); + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge + const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) { + return segment; // yes, allocated by us + } + if (index==0) return NULL; + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough + // valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = mi_bsr(lobits); // lobits != 0 + } + else { + uintptr_t lomask = mask; + loindex = index - 1; + while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--; + if (loindex==0) return NULL; + lobitidx = mi_bsr(lomask); // lomask != 0 + } + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if (mi_unlikely(!cookie_ok)) return NULL; + if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return (_mi_segment_of(p) != NULL); +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + +/* +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_reset_delayed(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); + return page; +} +*/ diff --git a/src/segment.c b/src/segment.c index b010fcbb..a1a38a64 100644 --- a/src/segment.c +++ b/src/segment.c @@ -13,8 +13,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_PAGE_HUGE_ALIGN (256*1024) -static void mi_segment_map_allocated_at(const mi_segment_t* segment); -static void mi_segment_map_freed_at(const mi_segment_t* segment); static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); /* -------------------------------------------------------------------------------- @@ -183,11 +181,6 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { Segment size calculations ----------------------------------------------------------- */ - -static size_t mi_segment_size(mi_segment_t* segment) { - return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; -} - static size_t mi_segment_info_size(mi_segment_t* segment) { return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; } @@ -249,7 +242,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; - mi_segment_map_freed_at(segment); + _mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)mi_segment_size(segment)),tld); if (MI_SECURE>0) { // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set @@ -264,7 +257,13 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // mi_segment_delayed_decommit(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); - _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->commit_mask, segment->mem_is_fixed, tld->os); + const size_t size = mi_segment_size(segment); + if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_fixed, tld->os)) { + const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size); + if (csize > 0 && !segment->mem_is_fixed) _mi_stat_decrease(&_mi_stats_main.committed, csize); + _mi_abandoned_await_readers(); // wait until safe to free + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_fixed /* pretend not committed to not double count decommits */, tld->os); + } } @@ -647,7 +646,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) const bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); - const bool commit = eager || (required > 0); + bool commit = eager || (required > 0); // Try to get from our cache first bool is_zero = false; @@ -657,8 +656,12 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy size_t memid = 0; - segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &commit_mask, &mem_large, &is_zero, &memid, os_tld); - if (segment == NULL) return NULL; // failed to allocate + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &mem_large, &is_zero, &memid, os_tld); + if (segment==NULL) { + segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); + if (segment == NULL) return NULL; // failed to allocate + commit_mask = (commit ? mi_commit_mask_full() : mi_commit_mask_empty()); + } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); @@ -674,7 +677,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ segment->mem_is_fixed = mem_large; segment->mem_is_committed = mi_commit_mask_is_full(commit_mask); mi_segments_track_size((long)(segment_size), tld); - mi_segment_map_allocated_at(segment); + _mi_segment_map_allocated_at(segment); } // zero the segment info? -- not always needed as it is zero initialized from the OS @@ -1368,126 +1371,3 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment } -/* ----------------------------------------------------------- - The following functions are to reliably find the segment or - block that encompasses any pointer p (or NULL if it is not - in any of our segments). - We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) - set to 1 if it contains the segment meta data. ------------------------------------------------------------ */ - - -#if (MI_INTPTR_SIZE==8) -#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB -#else -#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb -#endif - -#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) -#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) -#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) - -static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments - -static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { - mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? - uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; - *bitidx = segindex % (8*MI_INTPTR_SIZE); - return (segindex / (8*MI_INTPTR_SIZE)); -} - -static void mi_segment_map_allocated_at(const mi_segment_t* segment) { - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); - if (index==0) return; - uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - uintptr_t newmask; - do { - newmask = (mask | ((uintptr_t)1 << bitidx)); - } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); -} - -static void mi_segment_map_freed_at(const mi_segment_t* segment) { - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); - if (index == 0) return; - uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - uintptr_t newmask; - do { - newmask = (mask & ~((uintptr_t)1 << bitidx)); - } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); -} - -// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. -static mi_segment_t* _mi_segment_of(const void* p) { - mi_segment_t* segment = _mi_ptr_segment(p); - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge - const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) { - return segment; // yes, allocated by us - } - if (index==0) return NULL; - // search downwards for the first segment in case it is an interior pointer - // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough - // valid huge objects - // note: we could maintain a lowest index to speed up the path for invalid pointers? - size_t lobitidx; - size_t loindex; - uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); - if (lobits != 0) { - loindex = index; - lobitidx = mi_bsr(lobits); // lobits != 0 - } - else { - uintptr_t lomask = mask; - loindex = index - 1; - while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--; - if (loindex==0) return NULL; - lobitidx = mi_bsr(lomask); // lomask != 0 - } - // take difference as the addresses could be larger than the MAX_ADDRESS space. - size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; - segment = (mi_segment_t*)((uint8_t*)segment - diff); - - if (segment == NULL) return NULL; - mi_assert_internal((void*)segment < p); - bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(cookie_ok); - if (mi_unlikely(!cookie_ok)) return NULL; - if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range - mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); - return segment; -} - -// Is this a valid pointer in our heap? -static bool mi_is_valid_pointer(const void* p) { - return (_mi_segment_of(p) != NULL); -} - -bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - return mi_is_valid_pointer(p); -} - -/* -// Return the full segment range belonging to a pointer -static void* mi_segment_range_of(const void* p, size_t* size) { - mi_segment_t* segment = _mi_segment_of(p); - if (segment == NULL) { - if (size != NULL) *size = 0; - return NULL; - } - else { - if (size != NULL) *size = segment->segment_size; - return segment; - } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); - mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); - mi_reset_delayed(tld); - mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); - return page; -} -*/ diff --git a/src/static.c b/src/static.c index e53aff1d..79c0a033 100644 --- a/src/static.c +++ b/src/static.c @@ -25,6 +25,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "os.c" #include "bitmap.c" #include "arena.c" +#include "segment-cache.c" #include "segment.c" #include "page.c" #include "heap.c" From d87933a3b5f0a8b1d49b4f6ab284e061931957e6 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 8 Sep 2020 15:50:37 -0700 Subject: [PATCH 089/352] update comments --- src/segment-cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 569e878a..e7369bb3 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -6,8 +6,9 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- - Implements a cache of segments to avoid expensive OS calls - and also the full memory map of all segments. + Implements a cache of segments to avoid expensive OS calls and to reuse + the commit_mask to optimize the commit/decommit calls. + The full memory map of all segments is also implemented here. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" From 1d946146cc37595f277c7d781804fbc3cc49b9b7 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 11 Sep 2020 10:40:22 -0700 Subject: [PATCH 090/352] fix all_committed --- src/arena.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arena.c b/src/arena.c index 97c287d2..1f970753 100644 --- a/src/arena.c +++ b/src/arena.c @@ -228,14 +228,14 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, b Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld) { +void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld->stats != NULL); if (p==NULL) return; if (size==0) return; if (memid == MI_MEMID_OS) { // was a direct OS allocation, pass through - _mi_os_free_ex(p, size, is_committed, tld->stats); + _mi_os_free_ex(p, size, all_committed, tld->stats); } else { // allocated in an arena @@ -258,7 +258,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os } // potentially decommit if (arena->is_committed) { - mi_assert_internal(all_committed); + mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c) } else { mi_assert_internal(arena->blocks_committed != NULL); From 01307a25ffaf6991023f770a24bce72552dfc852 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 11 Sep 2020 11:00:19 -0700 Subject: [PATCH 091/352] fix assertion --- src/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitmap.c b/src/bitmap.c index 08289264..ea55d33e 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -178,7 +178,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit _Atomic(uintptr_t)* field = &bitmap[idx]; uintptr_t map = mi_atomic_load_relaxed(field); const size_t initial = mi_clz(map); // count of initial zeros starting at idx - mi_assert_internal(initial >= 0 && initial <= MI_BITMAP_FIELD_BITS); + mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); if (initial == 0) return false; if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries From b1cc3d550c212444cba6df8d0f12bd7cd75dd487 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 14 Sep 2020 10:42:47 -0700 Subject: [PATCH 092/352] fix valid pointer detection on mac --- src/segment-cache.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index e16c9e4a..f1ed5c06 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -74,7 +74,7 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm slot->p = NULL; mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); *commit_mask = cmask; - + // mark the slot as free again mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); @@ -85,7 +85,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo { if (mi_commit_mask_is_empty(*cmask)) { // nothing - } + } else if (mi_commit_mask_is_full(*cmask)) { _mi_os_decommit(p, total, stats); } @@ -108,7 +108,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo #define MI_MAX_PURGE_PER_PUSH (4) -static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) +static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) { UNUSED(tld); mi_msecs_t now = _mi_clock_now(); @@ -145,7 +145,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me { // only for normal segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - + // numa node determines start field int numa_node = _mi_os_numa_node(NULL); size_t start_field = 0; @@ -187,7 +187,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); } } - + // make it available _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); return true; @@ -217,9 +217,15 @@ static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB w static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? - uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; - *bitidx = segindex % (8*MI_INTPTR_SIZE); - return (segindex / (8*MI_INTPTR_SIZE)); + if ((uintptr_t)segment >= MI_MAX_ADDRESS) { + *bitidx = 0; + return 0; + } + else { + uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; + *bitidx = segindex % MI_INTPTR_BITS; + return (segindex / MI_INTPTR_BITS); + } } void _mi_segment_map_allocated_at(const mi_segment_t* segment) { @@ -257,8 +263,11 @@ static mi_segment_t* _mi_segment_of(const void* p) { return segment; // yes, allocated by us } if (index==0) return NULL; + + // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? + // search downwards for the first segment in case it is an interior pointer - // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough // valid huge objects // note: we could maintain a lowest index to speed up the path for invalid pointers? size_t lobitidx; From fbaa70e1eb5f7c797c4298f4d7a44f5e71ae06b2 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 14 Sep 2020 11:01:17 -0700 Subject: [PATCH 093/352] increase default test load to 25% to increase azure pipeline test load --- test/test-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-stress.c b/test/test-stress.c index 271bea85..46e1eac4 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -26,7 +26,7 @@ terms of the MIT license. // // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 10; // scaling factor +static int SCALE = 25; // scaling factor static int ITER = 50; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors From b149099bf328ef6ddb11da02bc64f67da8ff8694 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 24 Sep 2020 16:55:00 -0700 Subject: [PATCH 094/352] use relaxed load for last search position in an arena --- src/arena.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 1f970753..55700dc2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -103,9 +103,9 @@ static size_t mi_block_count_of_size(size_t size) { ----------------------------------------------------------- */ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { - size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search + size_t idx = mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time + mi_atomic_store_relaxed(&arena->search_idx, idx); // start search from here next time return true; }; return false; From e1c38eef76cb8bb3e49a32073c5c579afaae48fa Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 24 Sep 2020 17:20:39 -0700 Subject: [PATCH 095/352] use allow_decommit option for both the segment cache and pages --- include/mimalloc.h | 2 +- src/options.c | 4 ++-- src/segment-cache.c | 4 ++-- src/segment.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 7b6d5a0b..90e7675d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -314,7 +314,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_allow_decommit, mi_option_reset_delay, - mi_option_arena_reset_delay, + mi_option_segment_decommit_delay, mi_option_use_numa_nodes, mi_option_limit_os_alloc, mi_option_os_tag, diff --git a/src/options.c b/src/options.c index f81bb8af..9705a4cc 100644 --- a/src/options.c +++ b/src/options.c @@ -88,8 +88,8 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 1000, UNINIT, MI_OPTION(arena_reset_delay) }, // reset delay in milli-seconds for freed segments + { 500, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) + { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment-cache.c b/src/segment-cache.c index f1ed5c06..d1f49ab6 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -177,8 +177,8 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me slot->is_pinned = is_pinned; mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); slot->commit_mask = commit_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned) { - long delay = mi_option_get(mi_option_arena_reset_delay); + if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { + long delay = mi_option_get(mi_option_segment_decommit_delay); if (delay == 0) { _mi_abandoned_await_readers(); // wait until safe to decommit mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); diff --git a/src/segment.c b/src/segment.c index 8624f7e4..e64f8409 100644 --- a/src/segment.c +++ b/src/segment.c @@ -384,7 +384,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s mi_assert_internal((void*)start != (void*)segment); mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask); _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap - _mi_os_decommit(start, full_size, stats); // ok if this fails + if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); } // ok if this fails mi_commit_mask_clear(&segment->commit_mask, mask); } // increase expiration of reusing part of the delayed decommit @@ -422,7 +422,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ } static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { - if (mi_commit_mask_is_empty(segment->decommit_mask)) return; + if (!segment->allow_decommit || mi_commit_mask_is_empty(segment->decommit_mask)) return; mi_msecs_t now = _mi_clock_now(); if (!force && now < segment->decommit_expire) return; From ad058291953dbe00d7a7d7f786d17f7f4c563033 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 6 Nov 2020 17:49:10 -0800 Subject: [PATCH 096/352] remove shadow warning when building in static mode --- src/segment.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/segment.c b/src/segment.c index 22aa48c6..5bbf283c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -246,10 +246,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { if (MI_SECURE>0) { // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted - size_t os_page_size = _mi_os_page_size(); - _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_page_size, os_page_size); - uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_page_size; - _mi_os_unprotect(end, os_page_size); + size_t os_pagesize = _mi_os_page_size(); + _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); + uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; + _mi_os_unprotect(end, os_pagesize); } // purge delayed decommits now? (no, leave it to the cache) @@ -712,12 +712,12 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (MI_SECURE>0) { // in secure mode, we set up a protected page in between the segment info // and the page data - size_t os_page_size = _mi_os_page_size(); - mi_assert_internal(mi_segment_info_size(segment) - os_page_size >= pre_size); - _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_page_size, os_page_size); - uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_page_size; - mi_segment_ensure_committed(segment, end, os_page_size, tld->stats); - _mi_os_protect(end, os_page_size); + size_t os_pagesize = _mi_os_page_size(); + mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size); + _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); + uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; + mi_segment_ensure_committed(segment, end, os_pagesize, tld->stats); + _mi_os_protect(end, os_pagesize); if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-( guard_slices = 1; } From 217871cb459e4915fefcef2a45f63caa48a60fb0 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 22 Jan 2021 11:24:25 -0800 Subject: [PATCH 097/352] fix search_idx start in managed arenas --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 55700dc2..0cd8aba3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -105,7 +105,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* { size_t idx = mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_relaxed(&arena->search_idx, idx); // start search from here next time + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; From e314699ee0de0c4a9b227970e8c2956f50a49af5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 28 Jan 2021 17:32:42 -0800 Subject: [PATCH 098/352] add debug view of arenas --- include/mimalloc.h | 1 + src/arena.c | 31 +++++++++++++++++++++++++++++-- src/segment-cache.c | 12 ++++++++++++ test/test-stress.c | 10 +++++++--- 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 904e10d4..4ecae58d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -267,6 +267,7 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; +mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept; // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; diff --git a/src/arena.c b/src/arena.c index 0cd8aba3..ea3a1abd 100644 --- a/src/arena.c +++ b/src/arena.c @@ -49,7 +49,7 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); // Block info: bit 0 contains the `in_use` bit, the upper bits the // size in count of arena blocks. typedef uintptr_t mi_block_info_t; -#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_SIZE // 8MiB +#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 8MiB (must be at least MI_SEGMENT_ALIGN) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 4MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) @@ -103,7 +103,7 @@ static size_t mi_block_count_of_size(size_t size) { ----------------------------------------------------------- */ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { - size_t idx = mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter + size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; @@ -346,6 +346,33 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe return 0; } +static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) { + size_t inuse_count = 0; + for (size_t i = 0; i < field_count; i++) { + char buf[MI_BITMAP_FIELD_BITS + 1]; + uintptr_t field = mi_atomic_load_relaxed(&fields[i]); + for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) { + bool inuse = ((((uintptr_t)1 << bit) & field) != 0); + if (inuse) inuse_count++; + buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.'); + } + buf[MI_BITMAP_FIELD_BITS] = 0; + _mi_verbose_message("%s%s\n", prefix, buf); + } + return inuse_count; +} + +void mi_debug_show_arenas(void) mi_attr_noexcept { + size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count); + for (size_t i = 0; i < max_arenas; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; + size_t inuse_count = 0; + _mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count); + inuse_count += mi_debug_show_bitmap(" ", arena->blocks_inuse, arena->field_count); + _mi_verbose_message(" blocks in use ('x'): %zu\n", inuse_count); + } +} /* ----------------------------------------------------------- Reserve a huge page arena. diff --git a/src/segment-cache.c b/src/segment-cache.c index d1f49ab6..08517f4b 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -16,6 +16,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "bitmap.h" // atomic bitmap +//#define MI_CACHE_DISABLE 1 + #define MI_CACHE_FIELDS (16) #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit @@ -39,6 +41,10 @@ static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { +#ifdef MI_CACHE_DISABLE + return NULL; +#else + // only segment blocks if (size != MI_SEGMENT_SIZE) return NULL; @@ -79,6 +85,7 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); return p; +#endif } static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) @@ -143,6 +150,10 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) { +#ifdef MI_CACHE_DISABLE + return false; +#else + // only for normal segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; @@ -191,6 +202,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me // make it available _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); return true; +#endif } diff --git a/test/test-stress.c b/test/test-stress.c index c69c08cc..fde0c791 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -181,14 +181,15 @@ static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); static void test_stress(void) { uintptr_t r = rand(); for (int n = 0; n < ITER; n++) { - run_os_threads(THREADS, &stress); + run_os_threads(THREADS, &stress); for (int i = 0; i < TRANSFERS; i++) { if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers void* p = atomic_exchange_ptr(&transfer[i], NULL); free_items(p); } } - // mi_collect(false); + //mi_collect(false); + //mi_debug_show_arenas(); #if !defined(NDEBUG) || defined(MI_TSAN) if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif @@ -243,7 +244,9 @@ int main(int argc, char** argv) { // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. srand(0x7feb352d); - + + //mi_reserve_os_memory(512ULL << 20, true, true); + #if !defined(NDEBUG) && !defined(USE_STD_MALLOC) mi_stats_reset(); #endif @@ -256,6 +259,7 @@ int main(int argc, char** argv) { #if !defined(NDEBUG) && !defined(USE_STD_MALLOC) mi_collect(true); + //mi_debug_show_arenas(); #endif #ifndef USE_STD_MALLOC mi_stats_print(NULL); From 3bade4b1bd96ea4d815f2bb40082ff38164354e5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 29 Jan 2021 15:42:52 -0800 Subject: [PATCH 099/352] fix accounting of abandoned pages --- src/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/segment.c b/src/segment.c index 5bbf283c..d2902e69 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1104,6 +1104,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s if (mi_page_all_free(page)) { // if this page is all free now, free it without adding to any queues (yet) mi_assert_internal(page->next == NULL && page->prev==NULL); + _mi_stat_decrease(&tld->stats->pages_abandoned, 1); segment->abandoned--; slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce! mi_assert_internal(!mi_slice_is_used(slice)); From bd56782f26cc54720484df3d86b36872c169dfba Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 31 Jan 2021 14:02:06 -0800 Subject: [PATCH 100/352] bump version to 2.0.0 --- cmake/mimalloc-config-version.cmake | 4 ++-- include/mimalloc.h | 2 +- test/CMakeLists.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index ed95c19e..58b23676 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ -set(mi_version_major 1) -set(mi_version_minor 7) +set(mi_version_major 2) +set(mi_version_minor 0) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 8d1108a6..48594de7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 170 // major + 2 digits minor +#define MI_MALLOC_VERSION 200 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7392d20e..8d137e75 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.7 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 2.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From 47050371a1eb935a1571c1e17c3b142402ccc24e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 22 Feb 2021 15:05:47 -0800 Subject: [PATCH 101/352] fix issue #363 and disable assertion for now --- src/init.c | 5 ++++- src/segment.c | 3 +-- test/main-override.cpp | 27 +++++++++++++++++++++------ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/init.c b/src/init.c index aee08f5a..ecb73d6c 100644 --- a/src/init.c +++ b/src/init.c @@ -265,7 +265,10 @@ static bool _mi_heap_done(mi_heap_t* heap) { // free if not the main thread if (heap != &_mi_heap_main) { - mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); + // the following assertion does not always hold for huge segments as those are always treated + // as abondened: one may allocate it in one thread, but deallocate in another in which case + // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363 + // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); } #if 0 diff --git a/src/segment.c b/src/segment.c index d2902e69..acb7c58d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1311,7 +1311,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page); if (segment == NULL || page==NULL) return NULL; mi_assert_internal(segment->used==1); - mi_assert_internal(mi_page_block_size(page) >= size); + mi_assert_internal(mi_page_block_size(page) >= size); segment->thread_id = 0; // huge segments are immediately abandoned return page; } @@ -1334,7 +1334,6 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block page->is_zero = false; mi_assert(page->used == 0); mi_tld_t* tld = heap->tld; - // mi_segments_track_size((long)segment->segment_size, tld); _mi_segment_page_free(page, true, &tld->segments); } #if (MI_DEBUG!=0) diff --git a/test/main-override.cpp b/test/main-override.cpp index fe5403d1..4acdb34e 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -32,14 +32,17 @@ void heap_late_free(); // issue #204 void padding_shrink(); // issue #209 void various_tests(); void test_mt_shutdown(); +void large_alloc(void); // issue #363 int main() { mi_stats_reset(); // ignore earlier allocations - heap_thread_free_large(); - heap_no_delete(); - heap_late_free(); - padding_shrink(); - various_tests(); + large_alloc(); + + //heap_thread_free_large(); + //heap_no_delete(); + //heap_late_free(); + //padding_shrink(); + //various_tests(); //test_mt_shutdown(); mi_stats_print(NULL); return 0; @@ -176,7 +179,7 @@ void heap_thread_free_large_worker() { void heap_thread_free_large() { for (int i = 0; i < 100; i++) { - shared_p = mi_malloc_aligned(2*1024*1024 + 1, 8); + shared_p = mi_malloc_aligned(2 * 1024 * 1024 + 1, 8); auto t1 = std::thread(heap_thread_free_large_worker); t1.join(); } @@ -207,3 +210,15 @@ void test_mt_shutdown() std::cout << "done" << std::endl; } + +// issue #363 +using namespace std; + +void large_alloc(void) +{ + char* a = new char[1ull << 25]; + thread th([&] { + delete[] a; + }); + th.join(); +} \ No newline at end of file From dc6bce256d4f3ce87761f9337977dff3d8b1776c Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 6 Apr 2021 10:58:12 -0700 Subject: [PATCH 102/352] bump version to v2.0.1 --- include/mimalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 48594de7..2cdded9f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 200 // major + 2 digits minor +#define MI_MALLOC_VERSION 201 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 34ba03951e0756ad1c95c2dd01e967ed8a3f5745 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 6 Apr 2021 11:01:06 -0700 Subject: [PATCH 103/352] merge from dev --- include/mimalloc.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 988537c5..2cdded9f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,11 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -<<<<<<< HEAD #define MI_MALLOC_VERSION 201 // major + 2 digits minor -======= -#define MI_MALLOC_VERSION 171 // major + 2 digits minor ->>>>>>> dev // ------------------------------------------------------ // Compiler specific attributes From 06110589740eb58532d387501e9fdfda146e92e3 Mon Sep 17 00:00:00 2001 From: diorszeng <54629524+diorszeng@users.noreply.github.com> Date: Mon, 31 May 2021 15:02:17 +0800 Subject: [PATCH 104/352] Update mimalloc-types.h fix typo --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 750a2b28..325f487a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -117,7 +117,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE #define MI_SEGMENT_MASK (MI_SEGMENT_SIZE - 1) #define MI_SEGMENT_SLICE_SIZE (1ULL<< MI_SEGMENT_SLICE_SHIFT) -#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024 +#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 128 #define MI_SMALL_PAGE_SIZE (1ULL< Date: Sun, 6 Jun 2021 20:33:55 -0700 Subject: [PATCH 105/352] merge from dev --- src/segment-cache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 08517f4b..f655a92a 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -10,9 +10,9 @@ terms of the MIT license. A copy of the license can be found in the file the commit_mask to optimize the commit/decommit calls. The full memory map of all segments is also implemented here. -----------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "../include/mimalloc.h" +#include "../include/mimalloc-internal.h" +#include "../include/mimalloc-atomic.h" #include "bitmap.h" // atomic bitmap From e592360d4d9d91f8d5765ea6b2dd757a36a2fcea Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 7 Jun 2021 17:53:03 -0700 Subject: [PATCH 106/352] revert relative includes --- src/segment-cache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index f655a92a..08517f4b 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -10,9 +10,9 @@ terms of the MIT license. A copy of the license can be found in the file the commit_mask to optimize the commit/decommit calls. The full memory map of all segments is also implemented here. -----------------------------------------------------------------------------*/ -#include "../include/mimalloc.h" -#include "../include/mimalloc-internal.h" -#include "../include/mimalloc-atomic.h" +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" #include "bitmap.h" // atomic bitmap From 262022c1d1104874f304889a6ded878cd3d32cc6 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 1 Oct 2021 15:10:11 -0700 Subject: [PATCH 107/352] fix segment map for 32-bit systems (including wasm) --- src/segment-cache.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 08517f4b..00f904ab 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -225,13 +225,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me #define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) -static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments +static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? if ((uintptr_t)segment >= MI_MAX_ADDRESS) { *bitidx = 0; - return 0; + return MI_SEGMENT_MAP_WSIZE; } else { uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; @@ -244,7 +244,7 @@ void _mi_segment_map_allocated_at(const mi_segment_t* segment) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); - if (index==0) return; + if (index==MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; do { @@ -256,7 +256,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); - if (index == 0) return; + if (index == MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; do { @@ -274,7 +274,7 @@ static mi_segment_t* _mi_segment_of(const void* p) { if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) { return segment; // yes, allocated by us } - if (index==0) return NULL; + if (index==MI_SEGMENT_MAP_WSIZE) return NULL; // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? From e6b58052dae764bf5f1b79ffb65fbbc19596934a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 2 Oct 2021 11:13:00 -0700 Subject: [PATCH 108/352] add start offset to pages to reduce cache/page effects --- include/mimalloc-types.h | 4 ++++ src/alloc-aligned.c | 2 +- src/page.c | 4 ++-- src/segment.c | 14 ++++++++------ 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b3f247b0..2118dfbe 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -145,6 +145,10 @@ terms of the MIT license. A copy of the license can be found in the file // Used as a special value to encode block sizes in 32 bits. #define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) +// blocks up to this size are always allocated aligned +#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) + + // The free lists use encoded next fields // (Only actually encodes when MI_ENCODED_FREELIST is defined.) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 724c0a1b..2280783f 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -41,7 +41,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t } // use regular allocation if it is guaranteed to fit the alignment constraints - if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) { + if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); return p; diff --git a/src/page.c b/src/page.c index b732c078..82d5dd65 100644 --- a/src/page.c +++ b/src/page.c @@ -593,7 +593,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // ensure we don't touch memory beyond the page to reduce page commit. // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%. extend = (max_extend==0 ? 1 : max_extend); - } + } mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); mi_assert_internal(extend < (1UL<<16)); @@ -624,9 +624,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields mi_page_set_heap(page, heap); + page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start size_t page_size; _mi_segment_page_start(segment, page, &page_size); - page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(mi_page_block_size(page) <= page_size); mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); diff --git a/src/segment.c b/src/segment.c index 76ce2e0b..c6036c4a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -184,20 +184,22 @@ static size_t mi_segment_info_size(mi_segment_t* segment) { return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; } -static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t* page_size) +static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size) { ptrdiff_t idx = slice - segment->slices; size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; - if (page_size != NULL) *page_size = psize; - return (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); + // make the start not OS page aligned for smaller blocks to avoid page/cache effects + size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); + if (page_size != NULL) *page_size = psize - start_offset; + return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } // Start of the page available memory; can be used on uninitialized pages uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); - uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page_size); - mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); + uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page->xblock_size, page_size); + mi_assert_internal(page->xblock_size > 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } @@ -556,7 +558,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1); // commit before changing the slice data - if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) { + if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, 0, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) { return NULL; // commit failed! } From f945dbb390685b7b3c9bfd836ff3358c5c91ed41 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 19 Oct 2021 10:18:44 -0700 Subject: [PATCH 109/352] add space after _Atomic to prevent errors on msvc without /TP (see PR #452) --- src/segment-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 00f904ab..aab387f0 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -225,7 +225,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me #define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) -static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments +static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? From 2583ab73dcd4e88447784d7d33092a09f7ee426d Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 19 Oct 2021 13:57:36 -0700 Subject: [PATCH 110/352] remove region.c which belongs in dev only --- src/region.c | 505 --------------------------------------------------- 1 file changed, 505 deletions(-) delete mode 100644 src/region.c diff --git a/src/region.c b/src/region.c deleted file mode 100644 index 2f68b140..00000000 --- a/src/region.c +++ /dev/null @@ -1,505 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. There may be multiple -implementations of this (one could be the identity going directly to the OS, -another could be a simple cache etc), but the current one uses large "regions". -In contrast to the rest of mimalloc, the "regions" are shared between threads and -need to be accessed using atomic operations. -We need this memory layer between the raw OS calls because of: -1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory effectively. -2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses - in that object :-( (`malloc-large` tests this). This means we need a cheaper - way to reuse memory. -3. This layer allows for NUMA aware allocation. - -Possible issues: -- (2) can potentially be addressed too with a small cache per thread which is much - simpler. Generally though that requires shrinking of huge pages, and may overuse - memory per thread. (and is not compatible with `sbrk`). -- Since the current regions are per-process, we need atomic operations to - claim blocks which may be contended -- In the worst case, we need to search the whole region map (16KiB for 256GiB) - linearly. At what point will direct OS calls be faster? Is there a way to - do this better without adding too much complexity? ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" - -#include // memset - -#include "bitmap.h" - -// Internal raw OS interface -size_t _mi_os_large_page_size(void); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); - -// arena.c -void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); - - - -// Constants -#if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 64KiB for the region map -#elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map -#else -#error "define the maximum heap space allowed for regions on this platform" -#endif - -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE - -#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) -#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB -#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) - -// Region info -typedef union mi_region_info_u { - uintptr_t value; - struct { - bool valid; // initialized? - bool is_large:1; // allocated in fixed large/huge OS pages - bool is_pinned:1; // pinned memory cannot be decommitted - short numa_node; // the associated NUMA node (where -1 means no associated node) - } x; -} mi_region_info_t; - - -// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with -// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. -typedef struct mem_region_s { - _Atomic(uintptr_t) info; // mi_region_info_t.value - _Atomic(void*) start; // start of the memory area - mi_bitmap_field_t in_use; // bit per in-use block - mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block - mi_bitmap_field_t reset; // track if reset per block - _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena - uintptr_t padding; // round to 8 fields -} mem_region_t; - -// The region map -static mem_region_t regions[MI_REGION_MAX]; - -// Allocated regions -static _Atomic(uintptr_t) regions_count; // = 0; - - -/* ---------------------------------------------------------------------------- -Utility functions ------------------------------------------------------------------------------*/ - -// Blocks (of 4MiB) needed for the given size. -static size_t mi_region_block_count(size_t size) { - return _mi_divide_up(size, MI_SEGMENT_SIZE); -} - -/* -// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. -static size_t mi_good_commit_size(size_t size) { - if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; - return _mi_align_up(size, _mi_os_large_page_size()); -} -*/ - -// Return if a pointer points into a region reserved by us. -bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - if (p==NULL) return false; - size_t count = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); - if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; - } - return false; -} - - -static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); - mi_assert_internal(start != NULL); - return (start + (bit_idx * MI_SEGMENT_SIZE)); -} - -static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { - mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); - size_t idx = region - regions; - mi_assert_internal(®ions[idx] == region); - return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; -} - -static size_t mi_memid_create_from_arena(size_t arena_memid) { - return (arena_memid << 1) | 1; -} - - -static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { - if ((id&1)==1) { - if (arena_memid != NULL) *arena_memid = (id>>1); - return true; - } - else { - size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; - *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; - *region = ®ions[idx]; - return false; - } -} - - -/* ---------------------------------------------------------------------------- - Allocate a region is allocated from the OS (or an arena) ------------------------------------------------------------------------------*/ - -static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // not out of regions yet? - if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; - - // try to allocate a fresh region from the OS - bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); - bool region_large = (commit && allow_large); - bool is_zero = false; - bool is_pinned = false; - size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_pinned, &is_zero, &arena_memid, tld); - if (start == NULL) return false; - mi_assert_internal(!(region_large && !allow_large)); - mi_assert_internal(!region_large || region_commit); - - // claim a fresh slot - const uintptr_t idx = mi_atomic_increment_acq_rel(®ions_count); - if (idx >= MI_REGION_MAX) { - mi_atomic_decrement_acq_rel(®ions_count); - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats); - _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB)); - return false; - } - - // allocated, initialize and claim the initial blocks - mem_region_t* r = ®ions[idx]; - r->arena_memid = arena_memid; - mi_atomic_store_release(&r->in_use, (uintptr_t)0); - mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); - mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); - mi_atomic_store_release(&r->reset, (uintptr_t)0); - *bit_idx = 0; - _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_store_ptr_release(void,&r->start, start); - - // and share it - mi_region_info_t info; - info.value = 0; // initialize the full union to zero - info.x.valid = true; - info.x.is_large = region_large; - info.x.is_pinned = is_pinned; - info.x.numa_node = (short)_mi_os_numa_node(tld); - mi_atomic_store_release(&r->info, info.value); // now make it available to others - *region = r; - return true; -} - -/* ---------------------------------------------------------------------------- - Try to claim blocks in suitable regions ------------------------------------------------------------------------------*/ - -static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { - // initialized at all? - mi_region_info_t info; - info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); - if (info.value==0) return false; - - // numa correct - if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = info.x.numa_node; - if (rnode >= 0 && rnode != numa_node) return false; - } - - // check allow-large - if (!allow_large && info.x.is_large) return false; - - return true; -} - - -static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // try all regions for a free slot - const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - mem_region_t* r = ®ions[idx]; - // if this region suits our demand (numa node matches, large OS page matches) - if (mi_region_is_suitable(r, numa_node, allow_large)) { - // then try to atomically claim a segment(s) in this region - if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { - tld->region_idx = idx; // remember the last found position - *region = r; - return true; - } - } - } - return false; -} - - -static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); - mem_region_t* region; - mi_bitmap_index_t bit_idx; - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); - // try to claim in existing regions - if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { - // otherwise try to allocate a fresh region and claim in there - if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { - // out of regions or memory - return NULL; - } - } - - // ------------------------------------------------ - // found a region and claimed `blocks` at `bit_idx`, initialize them now - mi_assert_internal(region != NULL); - mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); - - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); - mi_assert_internal(!(info.x.is_large && !*large)); - mi_assert_internal(start != NULL); - - *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *large = info.x.is_large; - *is_pinned = info.x.is_pinned; - *memid = mi_memid_create(region, bit_idx); - void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - - // commit - if (*commit) { - // ensure commit - bool any_uncommitted; - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); - if (any_uncommitted) { - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - bool commit_zero = false; - if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { - // failed to commit! unclaim and return - mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - return NULL; - } - if (commit_zero) *is_zero = true; - } - } - else { - // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); - } - mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); - - // unreset reset blocks - if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { - // some blocks are still reset - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); - mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed - bool reset_zero = false; - _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); - if (reset_zero) *is_zero = true; - } - } - mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); - - #if (MI_DEBUG>=2) - if (*commit) { ((uint8_t*)p)[0] = 0; } - #endif - - // and return the allocation - mi_assert_internal(p != NULL); - return p; -} - - -/* ---------------------------------------------------------------------------- - Allocation ------------------------------------------------------------------------------*/ - -// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. -// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - *memid = 0; - *is_zero = false; - *is_pinned = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - if (size == 0) return NULL; - size = _mi_align_up(size, _mi_os_page_size()); - - // allocate from regions if possible - void* p = NULL; - size_t arena_memid; - const size_t blocks = mi_region_block_count(size); - if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { - p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); - if (p == NULL) { - _mi_warning_message("unable to allocate from region: size %zu\n", size); - } - } - if (p == NULL) { - // and otherwise fall back to the OS - p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, &arena_memid, tld); - *memid = mi_memid_create_from_arena(arena_memid); - } - - if (p != NULL) { - mi_assert_internal((uintptr_t)p % alignment == 0); -#if (MI_DEBUG>=2) - if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed -#endif - } - return p; -} - - - -/* ---------------------------------------------------------------------------- -Free ------------------------------------------------------------------------------*/ - -// Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { - mi_assert_internal(size > 0 && tld != NULL); - if (p==NULL) return; - if (size==0) return; - size = _mi_align_up(size, _mi_os_page_size()); - - size_t arena_memid = 0; - mi_bitmap_index_t bit_idx; - mem_region_t* region; - if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { - // was a direct arena allocation, pass through - _mi_arena_free(p, size, arena_memid, full_commit, tld->stats); - } - else { - // allocated in a region - mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; - const size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - mi_assert_internal(info.value != 0); - void* blocks_start = mi_region_blocks_start(region, bit_idx); - mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); - if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? - - // committed? - if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); - } - - if (any_reset) { - // set the is_reset bits if any pages were reset - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); - } - - // reset the blocks to reduce the working set. - if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) - && (mi_option_is_enabled(mi_option_eager_commit) || - mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead - { - bool any_unreset; - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); - if (any_unreset) { - _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) - _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); - } - } - - // and unclaim - bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed); - } -} - - -/* ---------------------------------------------------------------------------- - collection ------------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_os_tld_t* tld) { - // free every region that has no segments in use. - uintptr_t rcount = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < rcount; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_load_relaxed(®ion->info) != 0) { - // if no segments used, try to claim the whole region - uintptr_t m = mi_atomic_load_relaxed(®ion->in_use); - while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; - if (m == 0) { - // on success, free the whole region - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); - size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); - uintptr_t commit = mi_atomic_load_relaxed(®ions[i].commit); - memset(®ions[i], 0, sizeof(mem_region_t)); - // and release the whole region - mi_atomic_store_release(®ion->info, (uintptr_t)0); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_abandoned_await_readers(); // ensure no pending reads - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats); - } - } - } - } -} - - -/* ---------------------------------------------------------------------------- - Other ------------------------------------------------------------------------------*/ - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - return _mi_os_reset(p, size, tld->stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - return _mi_os_unreset(p, size, is_zero, tld->stats); -} - -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - return _mi_os_commit(p, size, is_zero, tld->stats); -} - -bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - return _mi_os_decommit(p, size, tld->stats); -} - -bool _mi_mem_protect(void* p, size_t size) { - return _mi_os_protect(p, size); -} - -bool _mi_mem_unprotect(void* p, size_t size) { - return _mi_os_unprotect(p, size); -} From 54b65a556cf1225a5995865d1077340e192112e7 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 27 Oct 2021 10:15:12 -0700 Subject: [PATCH 111/352] fix mi_cfree assertion failure for NULL pointer, issue #478 --- src/segment-cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/segment-cache.c b/src/segment-cache.c index aab387f0..7bb58ccf 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -267,6 +267,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) { // Determine the segment belonging to a pointer or NULL if it is not in a valid segment. static mi_segment_t* _mi_segment_of(const void* p) { mi_segment_t* segment = _mi_ptr_segment(p); + if (segment == NULL) return NULL; size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge From 1568dbb9e43f3a4f88c596bf5a52b523e8cf6053 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 27 Oct 2021 10:35:16 -0700 Subject: [PATCH 112/352] fix mi_is_valid_pointer bit index search (related to issue #478) --- src/segment-cache.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 7bb58ccf..4adf2123 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -234,9 +234,11 @@ static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitid return MI_SEGMENT_MAP_WSIZE; } else { - uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; + const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; *bitidx = segindex % MI_INTPTR_BITS; - return (segindex / MI_INTPTR_BITS); + const size_t mapindex = segindex / MI_INTPTR_BITS; + mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); + return mapindex; } } @@ -290,13 +292,21 @@ static mi_segment_t* _mi_segment_of(const void* p) { loindex = index; lobitidx = mi_bsr(lobits); // lobits != 0 } + else if (index == 0) { + return NULL; + } else { + mi_assert_internal(index > 0); uintptr_t lomask = mask; - loindex = index - 1; - while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--; - if (loindex==0) return NULL; + loindex = index; + do { + loindex--; + lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); + } while (lomask != 0 && loindex > 0); + if (lomask == 0) return NULL; lobitidx = mi_bsr(lomask); // lomask != 0 } + mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); // take difference as the addresses could be larger than the MAX_ADDRESS space. size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; segment = (mi_segment_t*)((uint8_t*)segment - diff); From e4776337797489e19278c2bfc4e75c783ba5bff8 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 27 Oct 2021 10:41:14 -0700 Subject: [PATCH 113/352] fix assertion --- src/segment-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 4adf2123..9e409be4 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -245,7 +245,7 @@ static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitid void _mi_segment_map_allocated_at(const mi_segment_t* segment) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); if (index==MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; From 7756e1b5fea6501a63e7a1b082e1532c3b1d7240 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 27 Oct 2021 10:45:19 -0700 Subject: [PATCH 114/352] fix assertion --- src/segment-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 9e409be4..d7604502 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -257,7 +257,7 @@ void _mi_segment_map_allocated_at(const mi_segment_t* segment) { void _mi_segment_map_freed_at(const mi_segment_t* segment) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); if (index == MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; From 49c75a31574812a06a6384826421e9603f30032d Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 9 Nov 2021 20:19:31 -0800 Subject: [PATCH 115/352] wip: increase commit mask resolution --- include/mimalloc-internal.h | 184 ++++++++++++++++++++++++++---------- include/mimalloc-types.h | 31 ++++-- src/options.c | 4 +- src/segment-cache.c | 24 +++-- src/segment.c | 135 +++++++++++++++----------- test/test-stress.c | 4 +- 6 files changed, 255 insertions(+), 127 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 8b642c31..7e400217 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -86,7 +86,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, // "segment-cache.c" void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); @@ -691,77 +691,163 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // commit mask // ------------------------------------------------------------------- -#define MI_COMMIT_MASK_BITS (sizeof(mi_commit_mask_t)*8) -static inline mi_commit_mask_t mi_commit_mask_empty(void) { - return 0; +static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { + memset(cm, 0, sizeof(*cm)); } -static inline mi_commit_mask_t mi_commit_mask_full(void) { - return ~mi_commit_mask_empty(); +static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { + memset(cm, 0xFF, sizeof(*cm)); } -static inline mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) { +static inline void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) { mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); if (bitcount == MI_COMMIT_MASK_BITS) { mi_assert_internal(bitidx==0); - return mi_commit_mask_full(); + mi_commit_mask_create_full(cm); } else if (bitcount == 0) { - return mi_commit_mask_empty(); + mi_commit_mask_create_empty(cm); } else { - return (((uintptr_t)1 << bitcount) - 1) << bitidx; + mi_commit_mask_create_empty(cm); + ptrdiff_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; + ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; + while (bitcount > 0) { + mi_assert_internal(i < MI_COMMIT_MASK_N); + ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; + ptrdiff_t count = (bitcount > avail ? avail : bitcount); + size_t mask = (((size_t)1 << count) - 1) << ofs; + cm->mask[i] = mask; + bitcount -= count; + ofs = 0; + i++; + } } } -static inline bool mi_commit_mask_is_empty(mi_commit_mask_t mask) { - return (mask == 0); -} - -static inline bool mi_commit_mask_is_full(mi_commit_mask_t mask) { - return ((~mask) == 0); -} - -static inline bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { - return ((commit & mask) == mask); -} - -static inline bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { - return ((commit & mask) != 0); -} - -mi_decl_nodiscard static inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) { - return (commit & mask); -} - -static inline void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) { - *commit = (*commit) & (~mask); -} - -static inline void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) { - *commit = (*commit) | mask; -} - -static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total) { - if (mi_commit_mask_is_full(mask)) { - return total; +static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + if (cm->mask[i] != 0) return false; } - else if (mi_commit_mask_is_empty(mask)) { + return true; +} + +static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + if (cm->mask[i] != 0) return false; + } + return true; +} + +static inline bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false; + } + return true; +} + +static inline bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + if ((commit->mask[i] & cm->mask[i]) != 0) return true; + } + return false; +} + +static inline void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + res->mask[i] = (commit->mask[i] & cm->mask[i]); + } +} + +static inline void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + res->mask[i] &= ~(cm->mask[i]); + } +} + +static inline void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + res->mask[i] |= cm->mask[i]; + } +} + +static inline size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { + mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); + size_t count = 0; + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + size_t mask = cm->mask[i]; + if (~mask == 0) { + count += MI_COMMIT_MASK_FIELD_BITS; + } + else { + for (; mask != 0; mask >>= 1) { // todo: use popcount + if ((mask&1)!=0) count++; + } + } + } + // we use total since for huge segments each commit bit may represent a larger size + return (total / MI_COMMIT_MASK_BITS)* count; +} + + +static inline ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx ) { + ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; + ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; + size_t mask = 0; + // find first ones + while (i < MI_COMMIT_MASK_N) { + mask = cm->mask[i]; + mask >>= ofs; + if (mask != 0) { + while ((mask&1) == 0) { + mask >>= 1; + ofs++; + } + break; + } + i++; + ofs = 0; + } + if (i >= MI_COMMIT_MASK_N) { + // not found + *idx = MI_COMMIT_MASK_BITS; return 0; } else { - size_t count = 0; - for (; mask != 0; mask >>= 1) { // todo: use popcount - if ((mask&1)!=0) count++; - } - return (total/MI_COMMIT_MASK_BITS)*count; + // found, count ones + ptrdiff_t count = 0; + *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs; + mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1); + do { + do { + count++; + mask >>= 1; + } while (mask != 0); + if ((((count + ofs) % MI_COMMIT_MASK_FIELD_BITS) == 0)) { + i++; + if (i >= MI_COMMIT_MASK_N) break; + mask = cm->mask[i]; + if ((mask&1)==0) break; + ofs = 0; + } + } while (mask != 0); + mi_assert_internal(count > 0); + return count; } } +#define mi_commit_mask_foreach(cm,idx,count) \ + idx = 0; \ + while ((count = mi_commit_mask_next_run(cm,&idx)) > 0) { + +#define mi_commit_mask_foreach_end() \ + idx += count; \ + } + -#define mi_commit_mask_foreach(mask,idx,count) \ +#define xmi_commit_mask_foreach(mask,idx,count) \ idx = 0; \ while (mask != 0) { \ /* count ones */ \ @@ -773,7 +859,7 @@ static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t /* if found, do action */ \ if (count > 0) { -#define mi_commit_mask_foreach_end() \ +#define xmi_commit_mask_foreach_end() \ } \ idx += count; \ /* shift out the zero */ \ diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 82d74f7f..50e24fc9 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -83,9 +83,9 @@ terms of the MIT license. A copy of the license can be found in the file // or otherwise one might define an intptr_t type that is larger than a pointer... // ------------------------------------------------------ -#if INTPTR_MAX == 9223372036854775807LL +#if INTPTR_MAX == INT64_MAX # define MI_INTPTR_SHIFT (3) -#elif INTPTR_MAX == 2147483647LL +#elif INTPTR_MAX == INT32_MAX # define MI_INTPTR_SHIFT (2) #else #error platform must be 32 or 64 bits @@ -94,6 +94,18 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_INTPTR_SIZE (1< MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; -#define MI_COMMIT_SIZE (MI_SEGMENT_SIZE/MI_INTPTR_BITS) +#define MI_COMMIT_SIZE (128*1024) +#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) +#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS +#define MI_COMMIT_MASK_N (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) -#if (((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE) > 8*MI_INTPTR_SIZE) -#error "not enough commit bits to cover the segment size" +#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_N * MI_COMMIT_MASK_FIELD_BITS)) +#error "the segment size must be exactly divisible by the (commit size * size_t bits)" #endif -typedef mi_page_t mi_slice_t; +typedef struct mi_commit_mask_s { + size_t mask[MI_COMMIT_MASK_N]; +} mi_commit_mask_t; +typedef mi_page_t mi_slice_t; typedef int64_t mi_msecs_t; -typedef uintptr_t mi_commit_mask_t; // Segments are large allocated memory blocks (8mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that diff --git a/src/options.c b/src/options.c index 5f2eedec..5ea7a92a 100644 --- a/src/options.c +++ b/src/options.c @@ -66,7 +66,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) + { 0, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory @@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 500, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) + { 10, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) diff --git a/src/segment-cache.c b/src/segment-cache.c index d7604502..3a318cd4 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -76,11 +76,10 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm *memid = slot->memid; *is_pinned = slot->is_pinned; *is_zero = false; - mi_commit_mask_t cmask = slot->commit_mask; // copy + *commit_mask = slot->commit_mask; slot->p = NULL; mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - *commit_mask = cmask; - + // mark the slot as free again mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); @@ -90,27 +89,26 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) { - if (mi_commit_mask_is_empty(*cmask)) { + if (mi_commit_mask_is_empty(cmask)) { // nothing } - else if (mi_commit_mask_is_full(*cmask)) { + else if (mi_commit_mask_is_full(cmask)) { _mi_os_decommit(p, total, stats); } else { // todo: one call to decommit the whole at once? mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); size_t part = total/MI_COMMIT_MASK_BITS; - uintptr_t idx; - uintptr_t count; - mi_commit_mask_t mask = *cmask; - mi_commit_mask_foreach(mask, idx, count) { + ptrdiff_t idx; + ptrdiff_t count; + mi_commit_mask_foreach(cmask, idx, count) { void* start = (uint8_t*)p + (idx*part); size_t size = count*part; _mi_os_decommit(start, size, stats); } mi_commit_mask_foreach_end() } - *cmask = mi_commit_mask_empty(); + mi_commit_mask_create_empty(cmask); } #define MI_MAX_PURGE_PER_PUSH (4) @@ -135,7 +133,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) if (expire != 0 && now >= expire) { // safe read // still expired, decommit it mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); _mi_abandoned_await_readers(); // wait until safe to decommit // decommit committed parts // TODO: instead of decommit, we could also free to the OS? @@ -148,7 +146,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) } } -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) +mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return false; @@ -187,7 +185,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me slot->memid = memid; slot->is_pinned = is_pinned; mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->commit_mask = commit_mask; + slot->commit_mask = *commit_mask; if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { long delay = mi_option_get(mi_option_segment_decommit_delay); if (delay == 0) { diff --git a/src/segment.c b/src/segment.c index 6ae3d9af..ee0a2ae2 100644 --- a/src/segment.c +++ b/src/segment.c @@ -127,7 +127,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); - mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask)); // can only decommit committed blocks + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -256,8 +256,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); - if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { - const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size); + if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { + const size_t csize = mi_commit_mask_committed_size(&segment->commit_mask, size); if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); _mi_abandoned_await_readers(); // wait until safe to free _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os); @@ -331,74 +331,85 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { Span management ----------------------------------------------------------- */ -static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size) { - mi_assert_internal(_mi_ptr_segment(p) == segment); - if (size == 0 || size > MI_SEGMENT_SIZE) return 0; - if (p >= (uint8_t*)segment + mi_segment_size(segment)) return 0; +static ptrdiff_t _mi_aligni_up(ptrdiff_t sz, size_t alignment) { + return (ptrdiff_t)_mi_align_up(sz, alignment); +} - uintptr_t diff = (p - (uint8_t*)segment); - uintptr_t start; - uintptr_t end; +static ptrdiff_t _mi_aligni_down(ptrdiff_t sz, size_t alignment) { + return (ptrdiff_t)_mi_align_down(sz, alignment); +} + +static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) { + mi_assert_internal(_mi_ptr_segment(p) == segment); + mi_commit_mask_create_empty(cm); + if (size == 0 || size > MI_SEGMENT_SIZE) return; + if (p >= (uint8_t*)segment + mi_segment_size(segment)) return; + + ptrdiff_t diff = (p - (uint8_t*)segment); + ptrdiff_t start; + ptrdiff_t end; if (conservative) { - start = _mi_align_up(diff, MI_COMMIT_SIZE); - end = _mi_align_down(diff + size, MI_COMMIT_SIZE); + start = _mi_aligni_up(diff, MI_COMMIT_SIZE); + end = _mi_aligni_down(diff + size, MI_COMMIT_SIZE); } else { - start = _mi_align_down(diff, MI_COMMIT_SIZE); - end = _mi_align_up(diff + size, MI_COMMIT_SIZE); + start = _mi_aligni_down(diff, MI_COMMIT_SIZE); + end = _mi_aligni_up(diff + size, MI_COMMIT_SIZE); } mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0); *start_p = (uint8_t*)segment + start; *full_size = (end > start ? end - start : 0); - if (*full_size == 0) return 0; + if (*full_size == 0) return; - uintptr_t bitidx = start / MI_COMMIT_SIZE; - mi_assert_internal(bitidx < (MI_INTPTR_SIZE*8)); + ptrdiff_t bitidx = start / MI_COMMIT_SIZE; + mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); - uintptr_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 - if (bitidx + bitcount > MI_INTPTR_SIZE*8) { + ptrdiff_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 + if (bitidx + bitcount > MI_COMMIT_MASK_BITS) { _mi_warning_message("commit mask overflow: %zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size); } - mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8)); - - return mi_commit_mask_create(bitidx, bitcount); + mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); + mi_commit_mask_create(bitidx, bitcount, cm); } static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { // commit liberal, but decommit conservative uint8_t* start; size_t full_size; - mi_commit_mask_t mask = mi_segment_commit_mask(segment,!commit/*conservative*/,p,size,&start,&full_size); - if (mi_commit_mask_is_empty(mask) || full_size==0) return true; + mi_commit_mask_t mask; + mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; - if (commit && !mi_commit_mask_all_set(segment->commit_mask, mask)) { + if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) { bool is_zero = false; - mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask); - _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; - mi_commit_mask_set(&segment->commit_mask,mask); + mi_commit_mask_set(&segment->commit_mask, &mask); } - else if (!commit && mi_commit_mask_any_set(segment->commit_mask,mask)) { + else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { mi_assert_internal((void*)start != (void*)segment); - mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); } // ok if this fails - mi_commit_mask_clear(&segment->commit_mask, mask); + mi_commit_mask_clear(&segment->commit_mask, &mask); } // increase expiration of reusing part of the delayed decommit - if (commit && mi_commit_mask_any_set(segment->decommit_mask, mask)) { + if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) { segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); } // always undo delayed decommits - mi_commit_mask_clear(&segment->decommit_mask, mask); - mi_assert_internal((segment->commit_mask & segment->decommit_mask) == segment->decommit_mask); + mi_commit_mask_clear(&segment->decommit_mask, &mask); + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); return true; } static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask)); - if (mi_commit_mask_is_full(segment->commit_mask) && mi_commit_mask_is_empty(segment->decommit_mask)) return true; // fully committed + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed return mi_segment_commitx(segment,true,p,size,stats); } @@ -411,27 +422,30 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ // register for future decommit in the decommit mask uint8_t* start; size_t full_size; - mi_commit_mask_t mask = mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size); - if (mi_commit_mask_is_empty(mask) || full_size==0) return; + mi_commit_mask_t mask; + mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size==0) return; // update delayed commit - mi_commit_mask_set(&segment->decommit_mask, mi_commit_mask_intersect(mask,segment->commit_mask)); // only decommit what is committed; span_free may try to decommit more + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more + mi_commit_mask_set(&segment->decommit_mask, &cmask); segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); } } static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { - if (!segment->allow_decommit || mi_commit_mask_is_empty(segment->decommit_mask)) return; + if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return; mi_msecs_t now = _mi_clock_now(); if (!force && now < segment->decommit_expire) return; mi_commit_mask_t mask = segment->decommit_mask; segment->decommit_expire = 0; - segment->decommit_mask = mi_commit_mask_empty(); + mi_commit_mask_create_empty(&segment->decommit_mask); - uintptr_t idx; - uintptr_t count; - mi_commit_mask_foreach(mask, idx, count) { + ptrdiff_t idx; + ptrdiff_t count; + mi_commit_mask_foreach(&mask, idx, count) { // if found, decommit that sequence if (count > 0) { uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); @@ -439,8 +453,7 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st mi_segment_commitx(segment, false, p, size, stats); } } - mi_commit_mask_foreach_end() - mi_assert_internal(mi_commit_mask_is_empty(segment->decommit_mask)); + mi_commit_mask_foreach_end() } @@ -649,7 +662,14 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Try to get from our cache first bool is_zero = false; const bool commit_info_still_good = (segment != NULL); - mi_commit_mask_t commit_mask = (segment != NULL ? segment->commit_mask : mi_commit_mask_empty()); + mi_commit_mask_t commit_mask; + if (segment != NULL) { + commit_mask = segment->commit_mask; + } + else { + mi_commit_mask_create_empty(&commit_mask); + } + if (segment==NULL) { // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy @@ -659,23 +679,30 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (segment==NULL) { segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate - commit_mask = (commit ? mi_commit_mask_full() : mi_commit_mask_empty()); + if (commit) { + mi_commit_mask_create_full(&commit_mask); + } + else { + mi_commit_mask_create_empty(&commit_mask); + } } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); mi_assert_internal(commit_needed>0); - if (!mi_commit_mask_all_set(commit_mask,mi_commit_mask_create(0, commit_needed))) { + mi_commit_mask_t commit_needed_mask; + mi_commit_mask_create(0, commit_needed, &commit_needed_mask); + if (!mi_commit_mask_all_set(&commit_mask, &commit_needed_mask)) { // at least commit the info slices - mi_assert_internal(commit_needed*MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); + mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= info_slices*MI_SEGMENT_SLICE_SIZE); bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, &is_zero, tld->stats); if (!ok) return NULL; // failed to commit - mi_commit_mask_set(&commit_mask,mi_commit_mask_create(0, commit_needed)); + mi_commit_mask_set(&commit_mask, &commit_needed_mask); } segment->memid = memid; segment->mem_is_pinned = is_pinned; segment->mem_is_large = mem_large; - segment->mem_is_committed = mi_commit_mask_is_full(commit_mask); + segment->mem_is_committed = mi_commit_mask_is_full(&commit_mask); mi_segments_track_size((long)(segment_size), tld); _mi_segment_map_allocated_at(segment); } @@ -692,7 +719,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); segment->decommit_expire = 0; - segment->decommit_mask = mi_commit_mask_empty(); + mi_commit_mask_create_empty( &segment->decommit_mask ); } // initialize segment info diff --git a/test/test-stress.c b/test/test-stress.c index a9568dd9..100c6d66 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -39,12 +39,12 @@ static size_t use_one_size = 0; // use single object size of `N * s // #define USE_STD_MALLOC #ifdef USE_STD_MALLOC -#define custom_calloc(n,s) calloc(n,s) +#define custom_calloc(n,s) malloc(n*s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) #else #include -#define custom_calloc(n,s) mi_calloc(n,s) +#define custom_calloc(n,s) mi_malloc(n*s) #define custom_realloc(p,s) mi_realloc(p,s) #define custom_free(p) mi_free(p) #endif From 8cc7d0c0195642f94cd9fc347e621d3652beeb9b Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 10 Nov 2021 16:29:53 -0800 Subject: [PATCH 116/352] increase segment size to 64MiB --- include/mimalloc-types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 82d74f7f..8d1e5149 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -106,7 +106,7 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB -#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 8MiB +#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB @@ -127,7 +127,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB on 64-bit #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) -#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 4MiB on 64-bit +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) From 49d64dbc9571516dc8298f6bebc34ebf9d89afc8 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 10 Nov 2021 16:30:21 -0800 Subject: [PATCH 117/352] save decommit_mask for segments in the segment cache --- include/mimalloc-internal.h | 4 +-- src/options.c | 4 +-- src/segment-cache.c | 18 ++++++++---- src/segment.c | 22 +++++++++++--- test/main-override.cpp | 57 +++++++++++++++++++++++++++++-------- 5 files changed, 79 insertions(+), 26 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 8b642c31..7ffa0023 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -85,8 +85,8 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinn void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); // "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, mi_commit_mask_t decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); diff --git a/src/options.c b/src/options.c index 5f2eedec..dbd4158c 100644 --- a/src/options.c +++ b/src/options.c @@ -89,8 +89,8 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 500, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) - { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments + { 100, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) + { 500, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment-cache.c b/src/segment-cache.c index d7604502..6513204d 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -22,13 +22,14 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit #define BITS_SET() ATOMIC_VAR_INIT(UINTPTR_MAX) -#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) +#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes typedef struct mi_cache_slot_s { void* p; size_t memid; bool is_pinned; mi_commit_mask_t commit_mask; + mi_commit_mask_t decommit_mask; _Atomic(mi_msecs_t) expire; } mi_cache_slot_t; @@ -39,8 +40,10 @@ static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIEL static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { + if (_mi_preloading()) return NULL; + #ifdef MI_CACHE_DISABLE return NULL; #else @@ -76,11 +79,11 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm *memid = slot->memid; *is_pinned = slot->is_pinned; *is_zero = false; - mi_commit_mask_t cmask = slot->commit_mask; // copy + *commit_mask = slot->commit_mask; + *decommit_mask = slot->decommit_mask; slot->p = NULL; mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - *commit_mask = cmask; - + // mark the slot as free again mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); @@ -140,6 +143,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) // decommit committed parts // TODO: instead of decommit, we could also free to the OS? mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); + slot->decommit_mask = mi_commit_mask_empty(); } _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } @@ -148,7 +152,7 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) } } -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) +mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, mi_commit_mask_t decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return false; @@ -188,11 +192,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me slot->is_pinned = is_pinned; mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); slot->commit_mask = commit_mask; + slot->decommit_mask = decommit_mask; if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { long delay = mi_option_get(mi_option_segment_decommit_delay); if (delay == 0) { _mi_abandoned_await_readers(); // wait until safe to decommit mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); + slot->decommit_mask = mi_commit_mask_empty(); } else { mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); diff --git a/src/segment.c b/src/segment.c index 6ae3d9af..1533d281 100644 --- a/src/segment.c +++ b/src/segment.c @@ -256,7 +256,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); - if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { + if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size); if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); _mi_abandoned_await_readers(); // wait until safe to free @@ -650,12 +650,13 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ bool is_zero = false; const bool commit_info_still_good = (segment != NULL); mi_commit_mask_t commit_mask = (segment != NULL ? segment->commit_mask : mi_commit_mask_empty()); + mi_commit_mask_t decommit_mask = (segment != NULL ? segment->decommit_mask : mi_commit_mask_empty()); if (segment==NULL) { // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy bool is_pinned = false; size_t memid = 0; - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld); + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment==NULL) { segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate @@ -691,9 +692,22 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (!commit_info_still_good) { segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); - segment->decommit_expire = 0; - segment->decommit_mask = mi_commit_mask_empty(); + if (segment->allow_decommit) { + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); + segment->decommit_mask = decommit_mask; + mi_assert_internal(mi_commit_mask_all_set(segment->commit_mask, segment->decommit_mask)); + #if MI_DEBUG>2 + const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_assert_internal(!mi_commit_mask_any_set(segment->decommit_mask, mi_commit_mask_create(0, commit_needed))); + #endif + } + else { + mi_assert_internal(mi_commit_mask_is_empty(decommit_mask)); + segment->decommit_expire = 0; + segment->decommit_mask = mi_commit_mask_empty(); + } } + // initialize segment info segment->segment_slices = segment_slices; diff --git a/test/main-override.cpp b/test/main-override.cpp index 32011c67..37734d37 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -35,22 +35,24 @@ static void test_mt_shutdown(); static void large_alloc(void); // issue #363 static void fail_aslr(); // issue #372 static void tsan_numa_test(); // issue #414 -static void strdup_test(); // issue #445 +static void strdup_test(); // issue #445 +static void bench_alloc_large(void); // issue #xxx int main() { mi_stats_reset(); // ignore earlier allocations - - heap_thread_free_large(); - heap_no_delete(); - heap_late_free(); - padding_shrink(); - various_tests(); - large_alloc(); - tsan_numa_test(); - strdup_test(); + + heap_thread_free_large(); + heap_no_delete(); + heap_late_free(); + padding_shrink(); + various_tests(); + large_alloc(); + tsan_numa_test(); + strdup_test(); //test_mt_shutdown(); //fail_aslr(); + //bench_alloc_large(); mi_stats_print(NULL); return 0; } @@ -246,11 +248,42 @@ static void fail_aslr() { // issues #414 static void dummy_worker() { void* p = mi_malloc(0); - mi_free(p); + mi_free(p); } static void tsan_numa_test() { auto t1 = std::thread(dummy_worker); dummy_worker(); t1.join(); -} \ No newline at end of file +} + +// issue #? +#include +#include +#include + +static void bench_alloc_large(void) { + static constexpr int kNumBuffers = 20; + static constexpr size_t kMinBufferSize = 5 * 1024 * 1024; + static constexpr size_t kMaxBufferSize = 25 * 1024 * 1024; + std::unique_ptr buffers[kNumBuffers]; + + std::random_device rd; + std::mt19937 gen(42); //rd()); + std::uniform_int_distribution<> size_distribution(kMinBufferSize, kMaxBufferSize); + std::uniform_int_distribution<> buf_number_distribution(0, kNumBuffers - 1); + + static constexpr int kNumIterations = 2000; + const auto start = std::chrono::steady_clock::now(); + for (int i = 0; i < kNumIterations; ++i) { + int buffer_idx = buf_number_distribution(gen); + size_t new_size = size_distribution(gen); + buffers[buffer_idx] = std::make_unique(new_size); + } + const auto end = std::chrono::steady_clock::now(); + const auto num_ms = std::chrono::duration_cast(end - start).count(); + const auto us_per_allocation = std::chrono::duration_cast(end - start).count() / kNumIterations; + std::cout << kNumIterations << " allocations Done in " << num_ms << "ms." << std::endl; + std::cout << "Avg " << us_per_allocation << " us per allocation" << std::endl; +} + From b1aff903f5622a549572bc833473ee2295b17844 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 11 Nov 2021 17:45:41 -0800 Subject: [PATCH 118/352] fix decommit bug --- include/mimalloc-internal.h | 119 +++------------------------------- include/mimalloc-types.h | 10 +-- src/options.c | 4 +- src/segment.c | 124 ++++++++++++++++++++++++++++++++++-- test/main-override.cpp | 2 +- 5 files changed, 133 insertions(+), 126 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 51a6c916..4e05c724 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -693,37 +693,14 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { - memset(cm, 0, sizeof(*cm)); + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + cm->mask[i] = 0; + } } static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { - memset(cm, 0xFF, sizeof(*cm)); -} - -static inline void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) { - mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); - mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); - if (bitcount == MI_COMMIT_MASK_BITS) { - mi_assert_internal(bitidx==0); - mi_commit_mask_create_full(cm); - } - else if (bitcount == 0) { - mi_commit_mask_create_empty(cm); - } - else { - mi_commit_mask_create_empty(cm); - ptrdiff_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; - ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; - while (bitcount > 0) { - mi_assert_internal(i < MI_COMMIT_MASK_N); - ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; - ptrdiff_t count = (bitcount > avail ? avail : bitcount); - size_t mask = (((size_t)1 << count) - 1) << ofs; - cm->mask[i] = mask; - bitcount -= count; - ofs = 0; - i++; - } + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + cm->mask[i] = ~((size_t)0); } } @@ -773,70 +750,9 @@ static inline void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mas } } -static inline size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t count = 0; - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { - size_t mask = cm->mask[i]; - if (~mask == 0) { - count += MI_COMMIT_MASK_FIELD_BITS; - } - else { - for (; mask != 0; mask >>= 1) { // todo: use popcount - if ((mask&1)!=0) count++; - } - } - } - // we use total since for huge segments each commit bit may represent a larger size - return (total / MI_COMMIT_MASK_BITS)* count; -} - - -static inline ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx ) { - ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; - ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; - size_t mask = 0; - // find first ones - while (i < MI_COMMIT_MASK_N) { - mask = cm->mask[i]; - mask >>= ofs; - if (mask != 0) { - while ((mask&1) == 0) { - mask >>= 1; - ofs++; - } - break; - } - i++; - ofs = 0; - } - if (i >= MI_COMMIT_MASK_N) { - // not found - *idx = MI_COMMIT_MASK_BITS; - return 0; - } - else { - // found, count ones - ptrdiff_t count = 0; - *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs; - mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1); - do { - do { - count++; - mask >>= 1; - } while (mask != 0); - if ((((count + ofs) % MI_COMMIT_MASK_FIELD_BITS) == 0)) { - i++; - if (i >= MI_COMMIT_MASK_N) break; - mask = cm->mask[i]; - if ((mask&1)==0) break; - ofs = 0; - } - } while (mask != 0); - mi_assert_internal(count > 0); - return count; - } -} +void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm); +size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); +ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx); #define mi_commit_mask_foreach(cm,idx,count) \ idx = 0; \ @@ -847,25 +763,6 @@ static inline ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrd } -#define xmi_commit_mask_foreach(mask,idx,count) \ - idx = 0; \ - while (mask != 0) { \ - /* count ones */ \ - count = 0; \ - while ((mask&1)==1) { \ - mask >>= 1; \ - count++; \ - } \ - /* if found, do action */ \ - if (count > 0) { - -#define xmi_commit_mask_foreach_end() \ - } \ - idx += count; \ - /* shift out the zero */ \ - mask >>= 1; \ - idx++; \ - } // ------------------------------------------------------------------- // Fast "random" shuffle diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 1742fced..c60457c8 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -125,14 +125,14 @@ terms of the MIT license. A copy of the license can be found in the file // Derived constants -#define MI_SEGMENT_SIZE (1ULL< MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; -#define MI_COMMIT_SIZE (128*1024) +#define MI_COMMIT_SIZE (4*64*1024) #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS #define MI_COMMIT_MASK_N (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) diff --git a/src/options.c b/src/options.c index 5ea7a92a..925ecbf9 100644 --- a/src/options.c +++ b/src/options.c @@ -89,8 +89,8 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 10, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) - { 1000, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments + { 100, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) + { 500, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index 582953de..93548f24 100644 --- a/src/segment.c +++ b/src/segment.c @@ -15,6 +15,111 @@ terms of the MIT license. A copy of the license can be found in the file static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); +// ------------------------------------------------------------------- +// commit mask +// ------------------------------------------------------------------- + +void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) { + mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); + mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); + if (bitcount == MI_COMMIT_MASK_BITS) { + mi_assert_internal(bitidx==0); + mi_commit_mask_create_full(cm); + } + else if (bitcount == 0) { + mi_commit_mask_create_empty(cm); + } + else { + mi_commit_mask_create_empty(cm); + ptrdiff_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; + ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; + while (bitcount > 0) { + mi_assert_internal(i < MI_COMMIT_MASK_N); + ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; + ptrdiff_t count = (bitcount > avail ? avail : bitcount); + size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs); + cm->mask[i] = mask; + bitcount -= count; + ofs = 0; + i++; + } + } +} + + +size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { + mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); + size_t count = 0; + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + size_t mask = cm->mask[i]; + if (~mask == 0) { + count += MI_COMMIT_MASK_FIELD_BITS; + } + else { + for (; mask != 0; mask >>= 1) { // todo: use popcount + if ((mask&1)!=0) count++; + } + } + } + // we use total since for huge segments each commit bit may represent a larger size + return ((total / MI_COMMIT_MASK_BITS) * count); +} + + +ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) { + ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; + ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; + size_t mask = 0; + // find first ones + while (i < MI_COMMIT_MASK_N) { + mask = cm->mask[i]; + mask >>= ofs; + if (mask != 0) { + while ((mask&1) == 0) { + mask >>= 1; + ofs++; + } + break; + } + i++; + ofs = 0; + } + if (i >= MI_COMMIT_MASK_N) { + // not found + *idx = MI_COMMIT_MASK_BITS; + return 0; + } + else { + // found, count ones + ptrdiff_t count = 0; + *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs; + do { + mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1); + do { + count++; + mask >>= 1; + } while ((mask&1) == 1); + if ((((*idx + count) % MI_COMMIT_MASK_FIELD_BITS) == 0)) { + i++; + if (i >= MI_COMMIT_MASK_N) break; + mask = cm->mask[i]; + ofs = 0; + } + } while ((mask&1) == 1); + mi_assert_internal(count > 0); + return count; + } +} + +#define mi_commit_mask_foreach(cm,idx,count) \ + idx = 0; \ + while ((count = mi_commit_mask_next_run(cm,&idx)) > 0) { + +#define mi_commit_mask_foreach_end() \ + idx += count; \ + } + + /* -------------------------------------------------------------------------------- Segment allocation @@ -374,9 +479,11 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin } static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + // commit liberal, but decommit conservative - uint8_t* start; - size_t full_size; + uint8_t* start = NULL; + size_t full_size = 0; mi_commit_mask_t mask; mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask); if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; @@ -391,10 +498,14 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s } else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { mi_assert_internal((void*)start != (void*)segment); + //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask)); + mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); } // ok if this fails + if (segment->allow_decommit) { + _mi_os_decommit(start, full_size, stats); // ok if this fails + } mi_commit_mask_clear(&segment->commit_mask, &mask); } // increase expiration of reusing part of the delayed decommit @@ -403,7 +514,6 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s } // always undo delayed decommits mi_commit_mask_clear(&segment->decommit_mask, &mask); - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); return true; } @@ -420,8 +530,8 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ } else { // register for future decommit in the decommit mask - uint8_t* start; - size_t full_size; + uint8_t* start = NULL; + size_t full_size = 0; mi_commit_mask_t mask; mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask); if (mi_commit_mask_is_empty(&mask) || full_size==0) return; @@ -719,7 +829,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ if (!commit_info_still_good) { segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed - segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); + segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); if (segment->allow_decommit) { segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); segment->decommit_mask = decommit_mask; diff --git a/test/main-override.cpp b/test/main-override.cpp index 37734d37..8834f2c7 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -52,7 +52,7 @@ int main() { //test_mt_shutdown(); //fail_aslr(); - //bench_alloc_large(); + bench_alloc_large(); mi_stats_print(NULL); return 0; } From c6b82a4b37b8bdf0ccc754371492c632c3376311 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 12 Nov 2021 17:31:21 -0800 Subject: [PATCH 119/352] wip: change decommit expiration --- CMakeLists.txt | 2 +- src/options.c | 2 +- src/segment.c | 16 +++++++++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f880f74..bc4b3a51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -176,7 +176,7 @@ endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") - list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) + list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden $<$:-O3>) if(NOT MI_USE_CXX) list(APPEND mi_cflags -Wstrict-prototypes) endif() diff --git a/src/options.c b/src/options.c index dbd4158c..21edd97c 100644 --- a/src/options.c +++ b/src/options.c @@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 100, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) + { 50, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) { 500, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) diff --git a/src/segment.c b/src/segment.c index 1533d281..1ab7328e 100644 --- a/src/segment.c +++ b/src/segment.c @@ -416,7 +416,21 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ // update delayed commit mi_commit_mask_set(&segment->decommit_mask, mi_commit_mask_intersect(mask,segment->commit_mask)); // only decommit what is committed; span_free may try to decommit more - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); + mi_msecs_t now = _mi_clock_now(); + if (segment->decommit_expire == 0) { + // no previous decommits, initialize now + mi_assert_internal(mi_commit_mask_is_empty(segment->decommit_mask)); + segment->decommit_expire = now + mi_option_get(mi_option_reset_delay); + } + else if (segment->decommit_expire <= now) { + // previous decommit mask already expired + // mi_segment_delayed_decommit(segment, true, stats); + segment->decommit_expire = now + 1; + } + else { + // previous decommit mask is not yet expired + // segment->decommit_expire++; + } } } From 9322123a9756ee98796f193dd30f582119e17b4c Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 12 Nov 2021 19:32:57 -0800 Subject: [PATCH 120/352] start eager commit delay at N>2 --- src/options.c | 2 +- src/segment.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 21edd97c..e01e4665 100644 --- a/src/options.c +++ b/src/options.c @@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 50, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) + { 50, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) { 500, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) diff --git a/src/segment.c b/src/segment.c index a37252f5..794a0541 100644 --- a/src/segment.c +++ b/src/segment.c @@ -656,7 +656,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE; // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) - const bool eager_delay = (_mi_current_thread_count() > 4 && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + const bool eager_delay = (_mi_current_thread_count() > 2 && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (required > 0); From f58b4d923ad5f565822a420623f4d90354458d8d Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 12 Nov 2021 19:58:49 -0800 Subject: [PATCH 121/352] comment --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index 794a0541..01fbe022 100644 --- a/src/segment.c +++ b/src/segment.c @@ -425,7 +425,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ else if (segment->decommit_expire <= now) { // previous decommit mask already expired // mi_segment_delayed_decommit(segment, true, stats); - segment->decommit_expire = now + 1; + segment->decommit_expire = now + 1; // wait a tiny bit longer in case there is a series of free's } else { // previous decommit mask is not yet expired From 5dc4ec48fe1a6d7eb4861ab811fd01e3646317ae Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 12 Nov 2021 21:15:11 -0800 Subject: [PATCH 122/352] lower default reset delay --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index e01e4665..859b3871 100644 --- a/src/options.c +++ b/src/options.c @@ -89,7 +89,7 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 50, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) + { 25, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) { 500, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) From 9afc253726fbe28015b3c37841e41c9202d382ef Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 14:03:16 -0800 Subject: [PATCH 123/352] add comments, renaming --- include/mimalloc-internal.h | 50 +++++---------------------- include/mimalloc-types.h | 21 ++++++++---- src/segment.c | 68 +++++++++++++++++++++++++------------ 3 files changed, 70 insertions(+), 69 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index caf5a784..6b416b17 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -19,6 +19,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_CACHE_LINE 64 #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#pragma warning(disable:26812) // unscoped enum warning #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) @@ -696,72 +697,39 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // commit mask // ------------------------------------------------------------------- - static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { cm->mask[i] = 0; } } static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { cm->mask[i] = ~((size_t)0); } } static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if (cm->mask[i] != 0) return false; } return true; } static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if (cm->mask[i] != 0) return false; } return true; } -static inline bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { - if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false; - } - return true; -} - -static inline bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { - if ((commit->mask[i] & cm->mask[i]) != 0) return true; - } - return false; -} - -static inline void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { - res->mask[i] = (commit->mask[i] & cm->mask[i]); - } -} - -static inline void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { - res->mask[i] &= ~(cm->mask[i]); - } -} - -static inline void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { - res->mask[i] |= cm->mask[i]; - } -} - -void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm); -size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); -ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx); +// defined in `segment.c`: +size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); +ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx); #define mi_commit_mask_foreach(cm,idx,count) \ idx = 0; \ - while ((count = mi_commit_mask_next_run(cm,&idx)) > 0) { + while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { #define mi_commit_mask_foreach_end() \ idx += count; \ diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index c60457c8..5bf779fa 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -287,17 +287,26 @@ typedef enum mi_segment_kind_e { MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; -#define MI_COMMIT_SIZE (4*64*1024) -#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) -#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS -#define MI_COMMIT_MASK_N (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) +// ------------------------------------------------------ +// A segment holds a commit mask where a bit is set if +// the corresponding MI_COMMIT_SIZE area is committed. +// The MI_COMMIT_SIZE must be a multiple of the slice +// size. We define it as equal so we can decommit on a +// slice level which helps with (real) memory fragmentation +// over time. +// ------------------------------------------------------ -#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_N * MI_COMMIT_MASK_FIELD_BITS)) +#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) +#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) +#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS +#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) + +#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS)) #error "the segment size must be exactly divisible by the (commit size * size_t bits)" #endif typedef struct mi_commit_mask_s { - size_t mask[MI_COMMIT_MASK_N]; + size_t mask[MI_COMMIT_MASK_FIELD_COUNT]; } mi_commit_mask_t; typedef mi_page_t mi_slice_t; diff --git a/src/segment.c b/src/segment.c index da1664b0..d953438a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -15,11 +15,44 @@ terms of the MIT license. A copy of the license can be found in the file static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); + // ------------------------------------------------------------------- -// commit mask +// commit mask // ------------------------------------------------------------------- -void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) { +static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false; + } + return true; +} + +static bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if ((commit->mask[i] & cm->mask[i]) != 0) return true; + } + return false; +} + +static void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + res->mask[i] = (commit->mask[i] & cm->mask[i]); + } +} + +static void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + res->mask[i] &= ~(cm->mask[i]); + } +} + +static void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + res->mask[i] |= cm->mask[i]; + } +} + +static void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) { mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); if (bitcount == MI_COMMIT_MASK_BITS) { @@ -34,7 +67,7 @@ void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_ ptrdiff_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; while (bitcount > 0) { - mi_assert_internal(i < MI_COMMIT_MASK_N); + mi_assert_internal(i < MI_COMMIT_MASK_FIELD_COUNT); ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; ptrdiff_t count = (bitcount > avail ? avail : bitcount); size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs); @@ -46,11 +79,10 @@ void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_ } } - -size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { +size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); size_t count = 0; - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_N; i++) { + for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { size_t mask = cm->mask[i]; if (~mask == 0) { count += MI_COMMIT_MASK_FIELD_BITS; @@ -66,12 +98,12 @@ size_t mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { } -ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) { +ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) { ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; size_t mask = 0; // find first ones - while (i < MI_COMMIT_MASK_N) { + while (i < MI_COMMIT_MASK_FIELD_COUNT) { mask = cm->mask[i]; mask >>= ofs; if (mask != 0) { @@ -84,7 +116,7 @@ ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) { i++; ofs = 0; } - if (i >= MI_COMMIT_MASK_N) { + if (i >= MI_COMMIT_MASK_FIELD_COUNT) { // not found *idx = MI_COMMIT_MASK_BITS; return 0; @@ -101,7 +133,7 @@ ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) { } while ((mask&1) == 1); if ((((*idx + count) % MI_COMMIT_MASK_FIELD_BITS) == 0)) { i++; - if (i >= MI_COMMIT_MASK_N) break; + if (i >= MI_COMMIT_MASK_FIELD_COUNT) break; mask = cm->mask[i]; ofs = 0; } @@ -111,14 +143,6 @@ ptrdiff_t mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) { } } -#define mi_commit_mask_foreach(cm,idx,count) \ - idx = 0; \ - while ((count = mi_commit_mask_next_run(cm,&idx)) > 0) { - -#define mi_commit_mask_foreach_end() \ - idx += count; \ - } - /* -------------------------------------------------------------------------------- Segment allocation @@ -289,7 +313,7 @@ static size_t mi_segment_info_size(mi_segment_t* segment) { static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size) { ptrdiff_t idx = slice - segment->slices; - size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; + size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; // make the start not OS page aligned for smaller blocks to avoid page/cache effects size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); if (page_size != NULL) *page_size = psize - start_offset; @@ -362,7 +386,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { - const size_t csize = mi_commit_mask_committed_size(&segment->commit_mask, size); + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); _mi_abandoned_await_readers(); // wait until safe to free _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os); @@ -502,7 +526,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s bool is_zero = false; mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap + _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; mi_commit_mask_set(&segment->commit_mask, &mask); } @@ -512,7 +536,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); // ok if this fails } From 88e6b52b884ccdda4a989434fc4d57226257c9fb Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 15:25:51 -0800 Subject: [PATCH 124/352] fix types to size_t --- include/mimalloc-internal.h | 12 ++++++------ src/segment-cache.c | 6 +++--- src/segment.c | 34 +++++++++++++++++----------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cd6a4b6f..dd5c384a 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -698,34 +698,34 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // ------------------------------------------------------------------- static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { cm->mask[i] = 0; } } static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { cm->mask[i] = ~((size_t)0); } } static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if (cm->mask[i] != 0) return false; } return true; } static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if (cm->mask[i] != 0) return false; } return true; } // defined in `segment.c`: -size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); -ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx); +size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); +size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); #define mi_commit_mask_foreach(cm,idx,count) \ idx = 0; \ diff --git a/src/segment-cache.c b/src/segment-cache.c index d773658d..cabdec8f 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -100,9 +100,9 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo else { // todo: one call to decommit the whole at once? mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t part = total/MI_COMMIT_MASK_BITS; - ptrdiff_t idx; - ptrdiff_t count; + size_t part = total/MI_COMMIT_MASK_BITS; + size_t idx; + size_t count; mi_commit_mask_foreach(cmask, idx, count) { void* start = (uint8_t*)p + (idx*part); size_t size = count*part; diff --git a/src/segment.c b/src/segment.c index c164eb30..71fd9e9c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -21,38 +21,38 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st // ------------------------------------------------------------------- static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false; } return true; } static bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if ((commit->mask[i] & cm->mask[i]) != 0) return true; } return false; } static void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { res->mask[i] = (commit->mask[i] & cm->mask[i]); } } static void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { res->mask[i] &= ~(cm->mask[i]); } } static void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { res->mask[i] |= cm->mask[i]; } } -static void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commit_mask_t* cm) { +static void mi_commit_mask_create(size_t bitidx, size_t bitcount, mi_commit_mask_t* cm) { mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); if (bitcount == MI_COMMIT_MASK_BITS) { @@ -64,12 +64,12 @@ static void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commi } else { mi_commit_mask_create_empty(cm); - ptrdiff_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; - ptrdiff_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; + size_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; + size_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; while (bitcount > 0) { mi_assert_internal(i < MI_COMMIT_MASK_FIELD_COUNT); - ptrdiff_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; - ptrdiff_t count = (bitcount > avail ? avail : bitcount); + size_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; + size_t count = (bitcount > avail ? avail : bitcount); size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs); cm->mask[i] = mask; bitcount -= count; @@ -82,7 +82,7 @@ static void mi_commit_mask_create(ptrdiff_t bitidx, ptrdiff_t bitcount, mi_commi size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); size_t count = 0; - for (ptrdiff_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { size_t mask = cm->mask[i]; if (~mask == 0) { count += MI_COMMIT_MASK_FIELD_BITS; @@ -98,9 +98,9 @@ size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) } -ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) { - ptrdiff_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; - ptrdiff_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; +size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) { + size_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; + size_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; size_t mask = 0; // find first ones while (i < MI_COMMIT_MASK_FIELD_COUNT) { @@ -123,7 +123,7 @@ ptrdiff_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, ptrdiff_t* idx) { } else { // found, count ones - ptrdiff_t count = 0; + size_t count = 0; *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs; do { mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1); @@ -602,8 +602,8 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st segment->decommit_expire = 0; mi_commit_mask_create_empty(&segment->decommit_mask); - ptrdiff_t idx; - ptrdiff_t count; + size_t idx; + size_t count; mi_commit_mask_foreach(&mask, idx, count) { // if found, decommit that sequence if (count > 0) { From f1ce9228a173b9f6307785e0a9ad0af89a705f42 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 15:29:57 -0800 Subject: [PATCH 125/352] use size_t for bitmask --- include/mimalloc-types.h | 6 +++--- src/segment.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 24cffe6d..4d703f13 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -307,9 +307,9 @@ typedef enum mi_segment_kind_e { MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; -#define MI_COMMIT_SIZE (MI_SEGMENT_SIZE/MI_INTPTR_BITS) +#define MI_COMMIT_SIZE (MI_SEGMENT_SIZE/MI_SIZE_BITS) -#if (((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE) > 8*MI_INTPTR_SIZE) +#if (((1 << MI_SEGMENT_SHIFT)/MI_COMMIT_SIZE) > MI_SIZE_BITS) #error "not enough commit bits to cover the segment size" #endif @@ -317,7 +317,7 @@ typedef mi_page_t mi_slice_t; typedef int64_t mi_msecs_t; -typedef uintptr_t mi_commit_mask_t; +typedef size_t mi_commit_mask_t; // Segments are large allocated memory blocks (8mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that diff --git a/src/segment.c b/src/segment.c index af72cdf5..5516a626 100644 --- a/src/segment.c +++ b/src/segment.c @@ -336,9 +336,9 @@ static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conse if (size == 0 || size > MI_SEGMENT_SIZE) return 0; if (p >= (uint8_t*)segment + mi_segment_size(segment)) return 0; - uintptr_t diff = (p - (uint8_t*)segment); - uintptr_t start; - uintptr_t end; + size_t diff = (p - (uint8_t*)segment); + size_t start; + size_t end; if (conservative) { start = _mi_align_up(diff, MI_COMMIT_SIZE); end = _mi_align_down(diff + size, MI_COMMIT_SIZE); @@ -353,14 +353,14 @@ static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conse *full_size = (end > start ? end - start : 0); if (*full_size == 0) return 0; - uintptr_t bitidx = start / MI_COMMIT_SIZE; - mi_assert_internal(bitidx < (MI_INTPTR_SIZE*8)); + size_t bitidx = start / MI_COMMIT_SIZE; + mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); - uintptr_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 + size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 if (bitidx + bitcount > MI_INTPTR_SIZE*8) { _mi_warning_message("commit mask overflow: %zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size); } - mi_assert_internal((bitidx + bitcount) <= (MI_INTPTR_SIZE*8)); + mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); return mi_commit_mask_create(bitidx, bitcount); } @@ -443,8 +443,8 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st segment->decommit_expire = 0; segment->decommit_mask = mi_commit_mask_empty(); - uintptr_t idx; - uintptr_t count; + size_t idx; + size_t count; mi_commit_mask_foreach(mask, idx, count) { // if found, decommit that sequence if (count > 0) { From b72065f04bb5928f994e6f2f21cd79328558a08f Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 15:50:26 -0800 Subject: [PATCH 126/352] move commit mask functions to segment.c --- include/mimalloc-internal.h | 53 ++--------------------------- src/segment.c | 66 ++++++++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 55 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 993ba754..20a26dac 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -702,21 +702,6 @@ static inline mi_commit_mask_t mi_commit_mask_full(void) { return ~mi_commit_mask_empty(); } -static inline mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) { - mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); - mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); - if (bitcount == MI_COMMIT_MASK_BITS) { - mi_assert_internal(bitidx==0); - return mi_commit_mask_full(); - } - else if (bitcount == 0) { - return mi_commit_mask_empty(); - } - else { - return (((uintptr_t)1 << bitcount) - 1) << bitidx; - } -} - static inline bool mi_commit_mask_is_empty(mi_commit_mask_t mask) { return (mask == 0); } @@ -725,42 +710,7 @@ static inline bool mi_commit_mask_is_full(mi_commit_mask_t mask) { return ((~mask) == 0); } -static inline bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { - return ((commit & mask) == mask); -} - -static inline bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { - return ((commit & mask) != 0); -} - -mi_decl_nodiscard static inline mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) { - return (commit & mask); -} - -static inline void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) { - *commit = (*commit) & (~mask); -} - -static inline void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) { - *commit = (*commit) | mask; -} - -static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total) { - if (mi_commit_mask_is_full(mask)) { - return total; - } - else if (mi_commit_mask_is_empty(mask)) { - return 0; - } - else { - size_t count = 0; - for (; mask != 0; mask >>= 1) { // todo: use popcount - if ((mask&1)!=0) count++; - } - return (total/MI_COMMIT_MASK_BITS)*count; - } -} - +size_t _mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total); #define mi_commit_mask_foreach(mask,idx,count) \ idx = 0; \ @@ -782,6 +732,7 @@ static inline size_t mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t idx++; \ } + // ------------------------------------------------------------------- // Fast "random" shuffle // ------------------------------------------------------------------- diff --git a/src/segment.c b/src/segment.c index 5516a626..0b9502f2 100644 --- a/src/segment.c +++ b/src/segment.c @@ -23,6 +23,64 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st be reclaimed by still running threads, much like work-stealing. -------------------------------------------------------------------------------- */ +// ------------------------------------------------------------------- +// commit mask +// ------------------------------------------------------------------- + +static mi_commit_mask_t mi_commit_mask_create(uintptr_t bitidx, uintptr_t bitcount) { + mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); + mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); + if (bitcount == MI_COMMIT_MASK_BITS) { + mi_assert_internal(bitidx==0); + return mi_commit_mask_full(); + } + else if (bitcount == 0) { + return mi_commit_mask_empty(); + } + else { + return (((uintptr_t)1 << bitcount) - 1) << bitidx; + } +} + + +static bool mi_commit_mask_all_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { + return ((commit & mask) == mask); +} + +static bool mi_commit_mask_any_set(mi_commit_mask_t commit, mi_commit_mask_t mask) { + return ((commit & mask) != 0); +} + +mi_decl_nodiscard static mi_commit_mask_t mi_commit_mask_intersect(mi_commit_mask_t commit, mi_commit_mask_t mask) { + return (commit & mask); +} + +static void mi_commit_mask_clear(mi_commit_mask_t* commit, mi_commit_mask_t mask) { + *commit = (*commit) & (~mask); +} + +static void mi_commit_mask_set(mi_commit_mask_t* commit, mi_commit_mask_t mask) { + *commit = (*commit) | mask; +} + +size_t _mi_commit_mask_committed_size(mi_commit_mask_t mask, size_t total) { + if (mi_commit_mask_is_full(mask)) { + return total; + } + else if (mi_commit_mask_is_empty(mask)) { + return 0; + } + else { + size_t count = 0; + for (; mask != 0; mask >>= 1) { // todo: use popcount + if ((mask&1)!=0) count++; + } + return (total/MI_COMMIT_MASK_BITS)*count; + } +} + + + /* ----------------------------------------------------------- Slices ----------------------------------------------------------- */ @@ -257,7 +315,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, segment->commit_mask, segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { - const size_t csize = mi_commit_mask_committed_size(segment->commit_mask, size); + const size_t csize = _mi_commit_mask_committed_size(segment->commit_mask, size); if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); _mi_abandoned_await_readers(); // wait until safe to free _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os); @@ -358,7 +416,7 @@ static mi_commit_mask_t mi_segment_commit_mask(mi_segment_t* segment, bool conse size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 if (bitidx + bitcount > MI_INTPTR_SIZE*8) { - _mi_warning_message("commit mask overflow: %zu %zu %zu %zu 0x%p %zu\n", bitidx, bitcount, start, end, p, size); + _mi_warning_message("commit mask overflow: idx=%zu count=%zu start=%zx end=%zx p=0x%p size=%zu fullsize=%zu\n", bitidx, bitcount, start, end, p, size, *full_size); } mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); @@ -375,14 +433,14 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s if (commit && !mi_commit_mask_all_set(segment->commit_mask, mask)) { bool is_zero = false; mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask); - _mi_stat_decrease(&_mi_stats_main.committed, mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap + _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; mi_commit_mask_set(&segment->commit_mask,mask); } else if (!commit && mi_commit_mask_any_set(segment->commit_mask,mask)) { mi_assert_internal((void*)start != (void*)segment); mi_commit_mask_t cmask = mi_commit_mask_intersect(segment->commit_mask, mask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(cmask, MI_SEGMENT_SIZE)); // adjust for overlap if (segment->allow_decommit) { _mi_os_decommit(start, full_size, stats); } // ok if this fails mi_commit_mask_clear(&segment->commit_mask, mask); } From 12bfd18ba7b1b275ceb0a7339063c4d1f420b5da Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 16:15:03 -0800 Subject: [PATCH 127/352] fix commit mask for huge segments --- include/mimalloc-internal.h | 2 +- src/segment.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 4e90a774..cd3b0fde 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -718,7 +718,7 @@ static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - if (cm->mask[i] != 0) return false; + if (cm->mask[i] != ~((size_t)0)) return false; } return true; } diff --git a/src/segment.c b/src/segment.c index fee51e43..36a96699 100644 --- a/src/segment.c +++ b/src/segment.c @@ -463,8 +463,9 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) { mi_assert_internal(_mi_ptr_segment(p) == segment); + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); mi_commit_mask_create_empty(cm); - if (size == 0 || size > MI_SEGMENT_SIZE) return; + if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return; const size_t segsize = mi_segment_size(segment); if (p >= (uint8_t*)segment + segsize) return; @@ -546,6 +547,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed return mi_segment_commitx(segment,true,p,size,stats); } @@ -930,6 +932,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ } else { mi_assert_internal(huge_page!=NULL); + mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask)); *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance } From cdfbd6d08fff0d2c9409ae40ddc15c756740227b Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 17:12:21 -0800 Subject: [PATCH 128/352] decommit when abandoned segments move to the visited list --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index 0b9502f2..1c46242d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1315,7 +1315,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_decommit(segment, false, tld->stats); // decommit if needed + mi_segment_delayed_decommit(segment, true, tld->stats); // decommit if needed mi_abandoned_visited_push(segment); } } From fb5645a30d914535ebb4721f2fd549c11f91880a Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 19:41:41 -0800 Subject: [PATCH 129/352] increase decommit hysterisis --- src/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/segment.c b/src/segment.c index e8d84a19..2907ddf2 100644 --- a/src/segment.c +++ b/src/segment.c @@ -541,7 +541,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); } // always undo delayed decommits - mi_commit_mask_clear(&segment->decommit_mask, &mask); + mi_commit_mask_clear(&segment->decommit_mask, &mask); return true; } @@ -579,11 +579,11 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ else if (segment->decommit_expire <= now) { // previous decommit mask already expired // mi_segment_delayed_decommit(segment, true, stats); - segment->decommit_expire = now + 1; // wait a tiny bit longer in case there is a series of free's + segment->decommit_expire = now + 5; // wait a tiny bit longer in case there is a series of free's } else { // previous decommit mask is not yet expired - // segment->decommit_expire++; + // segment->decommit_expire += 1; // = now + mi_option_get(mi_option_reset_delay); } } } From fa66db840d4e2c2d4747823b1fa192bc4405c793 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 19:43:52 -0800 Subject: [PATCH 130/352] increase decommit hysterisis --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index 1c46242d..03c58fdd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -483,7 +483,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ else if (segment->decommit_expire <= now) { // previous decommit mask already expired // mi_segment_delayed_decommit(segment, true, stats); - segment->decommit_expire = now + 1; // wait a tiny bit longer in case there is a series of free's + segment->decommit_expire = now + 5; // wait a tiny bit longer in case there is a series of free's } else { // previous decommit mask is not yet expired From 511a8996f3568ba59ca019067173d1242a5dd786 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 13 Nov 2021 20:12:03 -0800 Subject: [PATCH 131/352] increase commit mask blocks to 2xslice size --- include/mimalloc-types.h | 7 +++---- src/segment.c | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index cd12418b..6b9da839 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -308,12 +308,11 @@ typedef enum mi_segment_kind_e { // A segment holds a commit mask where a bit is set if // the corresponding MI_COMMIT_SIZE area is committed. // The MI_COMMIT_SIZE must be a multiple of the slice -// size. We define it as equal so we can decommit on a -// slice level which helps with (real) memory fragmentation -// over time. +// size. If it is equal we have the most fine grained +// decommit but in practice 2x seems to perform better. // ------------------------------------------------------ -#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) +#define MI_COMMIT_SIZE (2*MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS #define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) diff --git a/src/segment.c b/src/segment.c index 2907ddf2..30c2e22b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -583,7 +583,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ } else { // previous decommit mask is not yet expired - // segment->decommit_expire += 1; // = now + mi_option_get(mi_option_reset_delay); + // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_reset_delay); } } } From f039774cf5aeb7e6f536fd29d6d26e56b433c742 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 14 Nov 2021 11:26:30 -0800 Subject: [PATCH 132/352] adjust decommit delay --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index c8a8b9b1..57737099 100644 --- a/src/segment.c +++ b/src/segment.c @@ -483,7 +483,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ else if (segment->decommit_expire <= now) { // previous decommit mask already expired // mi_segment_delayed_decommit(segment, true, stats); - segment->decommit_expire = now + 5; // wait a tiny bit longer in case there is a series of free's + segment->decommit_expire = now + (mi_option_get(mi_option_reset_delay) / 8); // wait a tiny bit longer in case there is a series of free's } else { // previous decommit mask is not yet expired From 32170897ddd7daf8398322659e0c2f1b99fd1547 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 14 Nov 2021 11:45:28 -0800 Subject: [PATCH 133/352] make decommit size equal to slice size --- include/mimalloc-types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 6b9da839..772e8839 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -309,10 +309,10 @@ typedef enum mi_segment_kind_e { // the corresponding MI_COMMIT_SIZE area is committed. // The MI_COMMIT_SIZE must be a multiple of the slice // size. If it is equal we have the most fine grained -// decommit but in practice 2x seems to perform better. +// decommit (but in practice 2x seems to perform better). // ------------------------------------------------------ -#define MI_COMMIT_SIZE (2*MI_SEGMENT_SLICE_SIZE) +#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS #define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) From 70547b5f1698358d4232258d26190952c2d5dc27 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 14 Nov 2021 12:09:20 -0800 Subject: [PATCH 134/352] fix slice count --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 772e8839..068513ba 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -141,7 +141,7 @@ typedef int32_t mi_ssize_t; #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE #define MI_SEGMENT_MASK (MI_SEGMENT_SIZE - 1) #define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT) -#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 128 +#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024 #define MI_SMALL_PAGE_SIZE (MI_ZU(1)< Date: Sun, 14 Nov 2021 12:10:07 -0800 Subject: [PATCH 135/352] fix slice count comment --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 4d703f13..ce84aa97 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -141,7 +141,7 @@ typedef int32_t mi_ssize_t; #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE #define MI_SEGMENT_MASK (MI_SEGMENT_SIZE - 1) #define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT) -#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 128 +#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024 #define MI_SMALL_PAGE_SIZE (MI_ZU(1)< Date: Sun, 14 Nov 2021 14:38:24 -0800 Subject: [PATCH 136/352] fix compilation on macos --- src/os.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/os.c b/src/os.c index 169680a1..3113a098 100644 --- a/src/os.c +++ b/src/os.c @@ -472,13 +472,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro } #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) { os_tag = 100; } - fd = VM_MAKE_TAG(os_tag); - #endif + #endif // huge page allocation if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static _Atomic(size_t) large_page_try_ok; // = 0; From f412df7a2b64421e1f1d61fde6055a6ea288e8f5 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 14 Nov 2021 16:51:41 -0800 Subject: [PATCH 137/352] make segment size smaller on 32-bit --- include/mimalloc-types.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 068513ba..68990626 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -128,9 +128,14 @@ typedef int32_t mi_ssize_t; // ------------------------------------------------------ // Main tuning parameters for segment and page sizes -// Sizes for 64-bit, divide by two for 32-bit -#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB +// Sizes for 64-bit (usually divide by two for 32-bit) +#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) + +#if MI_INTPTR_SIZE > 4 #define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB +#else +#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit +#endif #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB From 6efd78c5e0825b3cf502f1eacc13a8e03d4aaefe Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Nov 2021 10:52:39 -0800 Subject: [PATCH 138/352] remove O3 flag --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e5a46381..8a7cc5e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,7 +178,7 @@ endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") - list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden $<$:-O3>) + list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) if(NOT MI_USE_CXX) list(APPEND mi_cflags -Wstrict-prototypes) endif() From 72a33c37ef14abc24d3a5cdbb2be806fd24cb382 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 18 Dec 2021 11:34:02 -0800 Subject: [PATCH 139/352] merge from dev --- include/mimalloc-types.h | 4 ++-- src/segment.c | 2 +- test/test-api.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 9f97f8f5..957115c8 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -172,13 +172,13 @@ typedef int32_t mi_ssize_t; #endif // Maximum slice offset (15) -#define MI_MAX_SLICE_OFFSET ((MI_ALIGNED_MAX / MI_SEGMENT_SLICE_SIZE) - 1) +#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) // Used as a special value to encode block sizes in 32 bits. #define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) // blocks up to this size are always allocated aligned -#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) +#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) diff --git a/src/segment.c b/src/segment.c index 7b2fa28e..3001f160 100644 --- a/src/segment.c +++ b/src/segment.c @@ -317,7 +317,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; // make the start not OS page aligned for smaller blocks to avoid page/cache effects size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); - if (page_size != NULL) *page_size = psize - start_offset; + if (page_size != NULL) { *page_size = psize - start_offset; } return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } diff --git a/test/test-api.c b/test/test-api.c index 96817337..f057799a 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -163,7 +163,7 @@ int main(void) { for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) { void* ps[8]; for (int i = 0; i < 8 && ok; i++) { - ps[i] = mi_malloc_aligned(align/2 /*size*/, align); + ps[i] = mi_malloc_aligned(align*13 /*size*/, align); if (ps[i] == NULL || (uintptr_t)(ps[i]) % align != 0) { ok = false; } From f317225a70929fea9be62e15945c2e8890cf6a1a Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 10 Jan 2022 12:10:18 -0800 Subject: [PATCH 140/352] ignore reset_decommits option in the 2.x / dev-slice version --- include/mimalloc-internal.h | 2 +- src/options.c | 2 +- src/os.c | 15 +++++++-------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 348bef4b..45775df4 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -83,7 +83,7 @@ bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +// bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); diff --git a/src/options.c b/src/options.c index f7dbc620..b8bac750 100644 --- a/src/options.c +++ b/src/options.c @@ -72,7 +72,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory #else { 1, UNINIT, MI_OPTION(eager_region_commit) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED + { 0, UNINIT, MI_OPTION(reset_decommits) }, // legacy; ignored now and reset always uses MADV_FREE/MADV_DONTNEED (issue #518) #endif { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages diff --git a/src/os.c b/src/os.c index 8aac3845..ac2d73d1 100644 --- a/src/os.c +++ b/src/os.c @@ -938,9 +938,12 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); } +/* static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); + return mi_os_commitx(addr, size, true, true // conservative + , is_zero, stats); } +*/ // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory @@ -1003,14 +1006,10 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(addr, size, stats); - } - else { - return mi_os_resetx(addr, size, true, stats); - } + return mi_os_resetx(addr, size, true, stats); } +/* bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; @@ -1022,7 +1021,7 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat return mi_os_resetx(addr, size, false, stats); } } - +*/ // Protect a region in memory to be not accessible. static bool mi_os_protectx(void* addr, size_t size, bool protect) { From bd2ac3c92e3e00ec02b09081a7678478b42abf65 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Feb 2022 16:17:21 -0800 Subject: [PATCH 141/352] collect segment cache on mi_collect --- include/mimalloc-internal.h | 1 + src/heap.c | 5 ++++- src/segment-cache.c | 15 ++++++++++----- test/test-stress.c | 8 +++++--- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 45775df4..88142197 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -95,6 +95,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, // "segment-cache.c" void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); +void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); diff --git a/src/heap.c b/src/heap.c index d7975b0b..416a9a8d 100644 --- a/src/heap.c +++ b/src/heap.c @@ -147,11 +147,14 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); - // collect segment caches + // collect segment local caches if (collect >= MI_FORCE) { _mi_segment_thread_collect(&heap->tld->segments); } + // decommit in global segment caches + _mi_segment_cache_collect( collect >= MI_FORCE, &heap->tld->os); + // collect regions on program-exit (or shared library unload) if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { //_mi_mem_collect(&heap->tld->os); diff --git a/src/segment-cache.c b/src/segment-cache.c index cabdec8f..41d71e5e 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -115,13 +115,14 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo #define MI_MAX_PURGE_PER_PUSH (4) -static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) +static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld) { MI_UNUSED(tld); mi_msecs_t now = _mi_clock_now(); - size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start size_t purged = 0; - for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots + const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); + size_t idx = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); + for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots if (idx >= MI_CACHE_MAX) idx = 0; // wrap mi_cache_slot_t* slot = &cache[idx]; mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); @@ -144,11 +145,15 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) } _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } - if (purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push + if (!force && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push } } } +void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { + mi_segment_cache_purge(force, tld ); +} + mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE @@ -167,7 +172,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me } // purge expired entries - mi_segment_cache_purge(tld); + mi_segment_cache_purge(false /* force? */, tld); // find an available slot mi_bitmap_index_t bitidx; diff --git a/test/test-stress.c b/test/test-stress.c index 100c6d66..ff5fffeb 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -189,11 +189,13 @@ static void test_stress(void) { free_items(p); } } + #ifndef NDEBUG //mi_collect(false); - //mi_debug_show_arenas(); -#if !defined(NDEBUG) || defined(MI_TSAN) + //mi_debug_show_arenas(); + #endif + #if !defined(NDEBUG) || defined(MI_TSAN) if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } -#endif + #endif } } From ccfe00573172ce40c715629a8ed4691149227407 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Feb 2022 17:08:05 -0800 Subject: [PATCH 142/352] decommit in abandoned pages on mi_collect --- include/mimalloc-internal.h | 1 + src/heap.c | 3 +++ src/segment.c | 31 +++++++++++++++++++++++++++---- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 88142197..8e8b5c9c 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -110,6 +110,7 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_await_readers(void); +void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); diff --git a/src/heap.c b/src/heap.c index 416a9a8d..b0cae474 100644 --- a/src/heap.c +++ b/src/heap.c @@ -147,6 +147,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); + // collect abandoned pages + _mi_abandoned_collect(heap, collect >= MI_FORCE, &heap->tld->segments); + // collect segment local caches if (collect >= MI_FORCE) { _mi_segment_thread_collect(&heap->tld->segments); diff --git a/src/segment.c b/src/segment.c index 3001f160..980ca439 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1050,7 +1050,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) Abandonment When threads terminate, they can leave segments with -live blocks (reached through other threads). Such segments +live blocks (reachable through other threads). Such segments are "abandoned" and will be reclaimed by other threads to reuse their pages and/or free them eventually @@ -1065,11 +1065,11 @@ or decommitting segments that have a pending read operation. Note: the current implementation is one possible design; another way might be to keep track of abandoned segments -in the regions. This would have the advantage of keeping +in the arenas/segment_cache's. This would have the advantage of keeping all concurrent code in one place and not needing to deal with ABA issues. The drawback is that it is unclear how to scan abandoned segments efficiently in that case as they -would be spread among all other segments in the regions. +would be spread among all other segments in the arenas. ----------------------------------------------------------- */ // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers @@ -1431,7 +1431,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_decommit(segment, true, tld->stats); // decommit if needed + mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed mi_abandoned_visited_push(segment); } } @@ -1439,6 +1439,29 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } +void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) +{ + mi_segment_t* segment; + int max_tries = (force ? 16*1024 : 1024); // limit latency + if (force) { + mi_abandoned_visited_revisit(); + } + while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees) + if (segment->used == 0) { + // free the segment (by forced reclaim) to make it available to other threads. + // note: we could in principle optimize this by skipping reclaim and directly + // freeing but that would violate some invariants temporarily) + mi_segment_reclaim(segment, heap, 0, NULL, tld); + } + else { + // otherwise, decommit if needed and push on the visited list + mi_segment_delayed_decommit(segment, force, tld->stats); // forced decommit if needed + mi_abandoned_visited_push(segment); + } + } +} + /* ----------------------------------------------------------- Reclaim or allocate ----------------------------------------------------------- */ From 932f8661053fd5b6325f2758119757b2662c11a8 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Feb 2022 18:28:02 -0800 Subject: [PATCH 143/352] decommit segment cache on force collect --- src/segment-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 41d71e5e..9b838cef 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -126,14 +126,14 @@ static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld if (idx >= MI_CACHE_MAX) idx = 0; // wrap mi_cache_slot_t* slot = &cache[idx]; mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); - if (expire != 0 && now >= expire) { // racy read + if (expire != 0 && (force || now >= expire)) { // racy read // seems expired, first claim it from available purged++; mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // was available, we claimed it expire = mi_atomic_loadi64_acquire(&slot->expire); - if (expire != 0 && now >= expire) { // safe read + if (expire != 0 && (force || now >= expire)) { // safe read // still expired, decommit it mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); From 4e65b5018fd95df4f8ddcce53144bdb7e4d59b20 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Feb 2022 19:01:41 -0800 Subject: [PATCH 144/352] clean up options --- doc/mimalloc-doc.h | 5 ++--- include/mimalloc.h | 19 ++++++++-------- src/options.c | 56 +++++++++++++++++++++++++--------------------- src/segment.c | 16 ++++++------- 4 files changed, 51 insertions(+), 45 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 4cf8c2c3..6078f415 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -811,9 +811,8 @@ typedef enum mi_option_e { mi_option_segment_cache, ///< The number of segments per thread to keep cached. mi_option_page_reset, ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free. mi_option_segment_reset, ///< Experimental - mi_option_reset_delay, ///< Delay in milli-seconds before resetting a page (100ms by default) + mi_option_decommit_delay, ///< Delay in milli-seconds before decommitting currently unused reserved memory (25ms by default) mi_option_use_numa_nodes, ///< Pretend there are at most N NUMA nodes - mi_option_reset_decommits, ///< Experimental mi_option_eager_commit_delay, ///< Experimental mi_option_os_tag, ///< OS tag to assign to mimalloc'd memory _mi_option_last @@ -1068,7 +1067,7 @@ or via environment variables. - `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages when not in use to signal to the OS that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server) programs. By setting it to `0` no such page resets will be done which can improve performance for programs that are not long - running. As an alternative, the `MIMALLOC_RESET_DELAY=` can be set higher (100ms by default) to make the page + running. As an alternative, the `MIMALLOC_DECOMMIT_DELAY=` can be set higher (100ms by default) to make the page reset occur less frequently instead of turning it off completely. - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs diff --git a/include/mimalloc.h b/include/mimalloc.h index 98689d28..650948ea 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -306,27 +306,28 @@ typedef enum mi_option_e { mi_option_show_errors, mi_option_show_stats, mi_option_verbose, - // the following options are experimental + // some of the following options are experimental + // (deprecated options are kept for binary backward compatibility with v1.x versions) mi_option_eager_commit, - mi_option_eager_region_commit, - mi_option_reset_decommits, - mi_option_large_os_pages, // implies eager commit + mi_option_deprecated_eager_region_commit, + mi_option_deprecated_reset_decommits, + mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_reserve_huge_os_pages_at, mi_option_reserve_os_memory, mi_option_segment_cache, mi_option_page_reset, - mi_option_abandoned_page_reset, - mi_option_segment_reset, + mi_option_abandoned_page_decommit, + mi_option_deprecated_segment_reset, mi_option_eager_commit_delay, - mi_option_allow_decommit, - mi_option_reset_delay, - mi_option_segment_decommit_delay, + mi_option_decommit_delay, mi_option_use_numa_nodes, mi_option_limit_os_alloc, mi_option_os_tag, mi_option_max_errors, mi_option_max_warnings, + mi_option_allow_decommit, + mi_option_segment_decommit_delay, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index b8bac750..388be2e6 100644 --- a/src/options.c +++ b/src/options.c @@ -49,54 +49,50 @@ typedef struct mi_option_desc_s { mi_init_t init; // is it initialized yet? (from the environment) mi_option_t option; // for debugging: the option index should match the option const char* name; // option name without `mimalloc_` prefix + const char* legacy_name; // potential legacy v1.x option name } mi_option_desc_t; -#define MI_OPTION(opt) mi_option_##opt, #opt -#define MI_OPTION_DESC(opt) {0, UNINIT, MI_OPTION(opt) } +#define MI_OPTION(opt) mi_option_##opt, #opt, NULL +#define MI_OPTION_LEGACY(opt,legacy) mi_option_##opt, #opt, #legacy static mi_option_desc_t options[_mi_option_last] = { // stable options -#if MI_DEBUG || defined(MI_SHOW_ERRORS) + #if MI_DEBUG || defined(MI_SHOW_ERRORS) { 1, UNINIT, MI_OPTION(show_errors) }, -#else + #else { 0, UNINIT, MI_OPTION(show_errors) }, -#endif + #endif { 0, UNINIT, MI_OPTION(show_stats) }, { 0, UNINIT, MI_OPTION(verbose) }, - // the following options are experimental and not all combinations make sense. + // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? - { 0, UNINIT, MI_OPTION(eager_region_commit) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory - #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // legacy; ignored now and reset always uses MADV_FREE/MADV_DONTNEED (issue #518) - #endif + { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) }, + { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates - { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) -#if defined(__NetBSD__) + { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates + { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, + #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed -#elif defined(_WIN32) + #elif defined(_WIN32) { 4, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) -#else + #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) -#endif - { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after reset_delay milli-seconds) - { 25, UNINIT, MI_OPTION(reset_delay) }, // page reset delay in milli-seconds (= decommit) - { 500, UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments + #endif + { 25, UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output - { 16, UNINIT, MI_OPTION(max_warnings) } // maximum warnings that are output + { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output + { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) + { 500, UNINIT, MI_OPTION(segment_decommit_delay) } // decommit delay in milli-seconds for freed segments }; static void mi_option_init(mi_option_desc_t* desc); @@ -526,11 +522,21 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { static void mi_option_init(mi_option_desc_t* desc) { // Read option value from the environment + char s[64+1]; char buf[64+1]; mi_strlcpy(buf, "mimalloc_", sizeof(buf)); mi_strlcat(buf, desc->name, sizeof(buf)); - char s[64+1]; - if (mi_getenv(buf, s, sizeof(s))) { + bool found = mi_getenv(buf,s,sizeof(s)); + if (!found && desc->legacy_name != NULL) { + mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + mi_strlcat(buf, desc->legacy_name, sizeof(buf)); + found = mi_getenv(buf,s,sizeof(s)); + if (found) { + _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name ); + } + } + + if (found) { size_t len = strlen(s); if (len >= sizeof(buf)) len = sizeof(buf) - 1; for (size_t i = 0; i < len; i++) { diff --git a/src/segment.c b/src/segment.c index 980ca439..94c2f184 100644 --- a/src/segment.c +++ b/src/segment.c @@ -538,7 +538,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s } // increase expiration of reusing part of the delayed decommit if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) { - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); } // always undo delayed decommits mi_commit_mask_clear(&segment->decommit_mask, &mask); @@ -554,7 +554,7 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { if (!segment->allow_decommit) return; - if (mi_option_get(mi_option_reset_delay) == 0) { + if (mi_option_get(mi_option_decommit_delay) == 0) { mi_segment_commitx(segment, false, p, size, stats); } else { @@ -569,21 +569,21 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more mi_commit_mask_set(&segment->decommit_mask, &cmask); - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); mi_msecs_t now = _mi_clock_now(); if (segment->decommit_expire == 0) { // no previous decommits, initialize now mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); - segment->decommit_expire = now + mi_option_get(mi_option_reset_delay); + segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay); } else if (segment->decommit_expire <= now) { // previous decommit mask already expired // mi_segment_delayed_decommit(segment, true, stats); - segment->decommit_expire = now + (mi_option_get(mi_option_reset_delay) / 8); // wait a tiny bit longer in case there is a series of free's + segment->decommit_expire = now + (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's } else { // previous decommit mask is not yet expired - // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_reset_delay); + // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_decommit_delay); } } } @@ -877,7 +877,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); if (segment->allow_decommit) { - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay); + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); segment->decommit_mask = decommit_mask; mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); #if MI_DEBUG>2 @@ -1245,7 +1245,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { } // perform delayed decommits - mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset) /* force? */, tld->stats); + mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); From 741d39a0042b48793471f1e9e9217f9efe82efa2 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 3 Feb 2022 14:26:56 -0800 Subject: [PATCH 145/352] fix over aggressive decommit of abandoned pages --- src/heap.c | 31 ++++++++++++++++++------------- src/segment.c | 11 ++++++----- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/heap.c b/src/heap.c index b0cae474..4fdfb0b9 100644 --- a/src/heap.c +++ b/src/heap.c @@ -115,17 +115,20 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { if (heap==NULL || !mi_heap_is_initialized(heap)) return; - _mi_deferred_free(heap, collect >= MI_FORCE); + + const bool force = collect >= MI_FORCE; + _mi_deferred_free(heap, force); // note: never reclaim on collect but leave it to threads that need storage to reclaim - if ( - #ifdef NDEBUG + const bool force_main = + #ifdef NDEBUG collect == MI_FORCE - #else + #else collect >= MI_FORCE - #endif - && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim) - { + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim; + + if (force_main) { // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. _mi_abandoned_reclaim_all(heap, &heap->tld->segments); @@ -141,25 +144,27 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_heap_delayed_free(heap); // collect retired pages - _mi_heap_collect_retired(heap, collect >= MI_FORCE); + _mi_heap_collect_retired(heap, force); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); - // collect abandoned pages - _mi_abandoned_collect(heap, collect >= MI_FORCE, &heap->tld->segments); + // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list) + // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment + _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments); // collect segment local caches - if (collect >= MI_FORCE) { + if (force) { _mi_segment_thread_collect(&heap->tld->segments); } // decommit in global segment caches - _mi_segment_cache_collect( collect >= MI_FORCE, &heap->tld->os); + // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment + _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os); // collect regions on program-exit (or shared library unload) - if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { + if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { //_mi_mem_collect(&heap->tld->os); } } diff --git a/src/segment.c b/src/segment.c index 94c2f184..037b1316 100644 --- a/src/segment.c +++ b/src/segment.c @@ -569,7 +569,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more mi_commit_mask_set(&segment->decommit_mask, &cmask); - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); + // segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); mi_msecs_t now = _mi_clock_now(); if (segment->decommit_expire == 0) { // no previous decommits, initialize now @@ -582,8 +582,8 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ segment->decommit_expire = now + (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's } else { - // previous decommit mask is not yet expired - // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_decommit_delay); + // previous decommit mask is not yet expired, increase the expiration by a bit. + segment->decommit_expire += (mi_option_get(mi_option_decommit_delay) / 8); } } } @@ -1431,7 +1431,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed + mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again mi_abandoned_visited_push(segment); } } @@ -1456,7 +1456,8 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) } else { // otherwise, decommit if needed and push on the visited list - mi_segment_delayed_decommit(segment, force, tld->stats); // forced decommit if needed + // note: forced decommit can be expensive if many threads are destroyed/created as in mstress. + mi_segment_delayed_decommit(segment, force, tld->stats); mi_abandoned_visited_push(segment); } } From 0e1beb0018f45ab13bdfd567f67d7deecd08084f Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 3 Feb 2022 15:51:27 -0800 Subject: [PATCH 146/352] check for decommit allowed before purging the segment cache --- src/segment-cache.c | 1 + src/segment.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 9b838cef..93908c8f 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -118,6 +118,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld) { MI_UNUSED(tld); + if (!mi_option_is_enabled(mi_option_allow_decommit)) return; mi_msecs_t now = _mi_clock_now(); size_t purged = 0; const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); diff --git a/src/segment.c b/src/segment.c index 037b1316..e9d30510 100644 --- a/src/segment.c +++ b/src/segment.c @@ -569,8 +569,7 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more mi_commit_mask_set(&segment->decommit_mask, &cmask); - // segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); - mi_msecs_t now = _mi_clock_now(); + mi_msecs_t now = _mi_clock_now(); if (segment->decommit_expire == 0) { // no previous decommits, initialize now mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); From fb418831dfffaf9f89dce9f0793294995d839a1e Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 4 Feb 2022 16:10:51 -0800 Subject: [PATCH 147/352] only delay eager commit after the first thread --- src/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/segment.c b/src/segment.c index e9d30510..9f474ca5 100644 --- a/src/segment.c +++ b/src/segment.c @@ -578,11 +578,11 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ else if (segment->decommit_expire <= now) { // previous decommit mask already expired // mi_segment_delayed_decommit(segment, true, stats); - segment->decommit_expire = now + (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's } else { // previous decommit mask is not yet expired, increase the expiration by a bit. - segment->decommit_expire += (mi_option_get(mi_option_decommit_delay) / 8); + segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay); } } } @@ -809,7 +809,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) const bool eager_delay = (!_mi_os_has_overcommit() && // never delay on overcommit systems - _mi_current_thread_count() > 2 && // do not delay for the first N threads + _mi_current_thread_count() > 1 && // do not delay for the first N threads tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (required > 0); From 0e2df71829597295c94426539d00c08414d2725b Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 4 Feb 2022 16:11:38 -0800 Subject: [PATCH 148/352] increase minimal commit size to 8*slice-size and add decommit_extend_delay as option --- include/mimalloc-types.h | 4 ++-- include/mimalloc.h | 1 + src/options.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 41364286..8cd3a4c3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -320,10 +320,10 @@ typedef enum mi_segment_kind_e { // the corresponding MI_COMMIT_SIZE area is committed. // The MI_COMMIT_SIZE must be a multiple of the slice // size. If it is equal we have the most fine grained -// decommit (but in practice 2x seems to perform better). +// decommit but setting it higher can be more efficient. // ------------------------------------------------------ -#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) +#define MI_COMMIT_SIZE (8*MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS #define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) diff --git a/include/mimalloc.h b/include/mimalloc.h index 06597e9a..08805845 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -328,6 +328,7 @@ typedef enum mi_option_e { mi_option_max_warnings, mi_option_allow_decommit, mi_option_segment_decommit_delay, + mi_option_decommit_extend_delay, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 388be2e6..d2e61218 100644 --- a/src/options.c +++ b/src/options.c @@ -92,7 +92,8 @@ static mi_option_desc_t options[_mi_option_last] = { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) - { 500, UNINIT, MI_OPTION(segment_decommit_delay) } // decommit delay in milli-seconds for freed segments + { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments + { 2, UNINIT, MI_OPTION(decommit_extend_delay) } }; static void mi_option_init(mi_option_desc_t* desc); From e11100a13780297d7016eba0fcf541c85f60c16b Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Feb 2022 10:57:15 -0800 Subject: [PATCH 149/352] add minimal commit size for increased efficiency (decommit fine grained, commit coarse grained) --- include/mimalloc-types.h | 9 +++++++-- src/segment.c | 7 +++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 8cd3a4c3..63549792 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -320,10 +320,15 @@ typedef enum mi_segment_kind_e { // the corresponding MI_COMMIT_SIZE area is committed. // The MI_COMMIT_SIZE must be a multiple of the slice // size. If it is equal we have the most fine grained -// decommit but setting it higher can be more efficient. +// decommit (but setting it higher can be more efficient). +// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will +// be committed in one go which can be set higher than +// MI_COMMIT_SIZE for efficiency (while the decommit mask +// is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_COMMIT_SIZE (8*MI_SEGMENT_SLICE_SIZE) +#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB +#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS #define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) diff --git a/src/segment.c b/src/segment.c index e8c80d29..0970046d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -470,17 +470,20 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin if (p >= (uint8_t*)segment + segsize) return; size_t diff = (p - (uint8_t*)segment); + mi_assert_internal(diff + size <= segsize); + size_t start; size_t end; if (conservative) { + // decommit conservative start = _mi_align_up(diff, MI_COMMIT_SIZE); end = _mi_align_down(diff + size, MI_COMMIT_SIZE); } else { + // commit liberal start = _mi_align_down(diff, MI_COMMIT_SIZE); - end = _mi_align_up(diff + size, MI_COMMIT_SIZE); + end = _mi_align_up(diff + size, MI_MINIMAL_COMMIT_SIZE); } - mi_assert_internal(end <= segsize); if (end > segsize) { end = segsize; } From 8ec83f6945133e299290354ca74f65e906c8b163 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Feb 2022 11:21:47 -0800 Subject: [PATCH 150/352] increase min commit to 2 mib --- include/mimalloc-types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 63549792..310fb92b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -327,8 +327,8 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB -#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) +#define MI_MINIMAL_COMMIT_SIZE (2*MI_MiB) +#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS #define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) From 47f8caad4db06314d080b798f29b91e25dd51e76 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Feb 2022 17:23:28 -0800 Subject: [PATCH 151/352] improve commit chunk alignment --- src/segment.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/segment.c b/src/segment.c index 0970046d..b000e641 100644 --- a/src/segment.c +++ b/src/segment.c @@ -466,28 +466,35 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); mi_commit_mask_create_empty(cm); if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return; + const size_t segstart = mi_segment_info_size(segment); const size_t segsize = mi_segment_size(segment); if (p >= (uint8_t*)segment + segsize) return; - size_t diff = (p - (uint8_t*)segment); - mi_assert_internal(diff + size <= segsize); + size_t pstart = (p - (uint8_t*)segment); + mi_assert_internal(pstart + size <= segsize); size_t start; size_t end; if (conservative) { // decommit conservative - start = _mi_align_up(diff, MI_COMMIT_SIZE); - end = _mi_align_down(diff + size, MI_COMMIT_SIZE); + start = _mi_align_up(pstart, MI_COMMIT_SIZE); + end = _mi_align_down(pstart + size, MI_COMMIT_SIZE); + mi_assert_internal(start >= segstart); + mi_assert_internal(end <= segsize); } else { // commit liberal - start = _mi_align_down(diff, MI_COMMIT_SIZE); - end = _mi_align_up(diff + size, MI_MINIMAL_COMMIT_SIZE); + start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE); + end = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE); + } + if (start < segstart) { + start = segstart; } if (end > segsize) { end = segsize; } + mi_assert_internal(start <= pstart && (pstart + size) <= end); mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0); *start_p = (uint8_t*)segment + start; *full_size = (end > start ? end - start : 0); @@ -504,14 +511,19 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin mi_commit_mask_create(bitidx, bitcount, cm); } -#define MI_COMMIT_SIZE_BATCH MiB static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - //if (commit && size < MI_COMMIT_SIZE_BATCH && p + MI_COMMIT_SIZE_BATCH <= mi_segment_end(segment)) { - // size = MI_COMMIT_SIZE_BATCH; - // } + // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes. + /* + if (commit && size > 0) { + const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE); + if (p + csize <= mi_segment_end(segment)) { + size = csize; + } + } + */ // commit liberal, but decommit conservative uint8_t* start = NULL; size_t full_size = 0; @@ -569,13 +581,13 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ if (mi_commit_mask_is_empty(&mask) || full_size==0) return; // update delayed commit + mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask)); mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more mi_commit_mask_set(&segment->decommit_mask, &cmask); mi_msecs_t now = _mi_clock_now(); if (segment->decommit_expire == 0) { // no previous decommits, initialize now - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay); } else if (segment->decommit_expire <= now) { @@ -609,7 +621,8 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st mi_segment_commitx(segment, false, p, size, stats); } } - mi_commit_mask_foreach_end() + mi_commit_mask_foreach_end() + mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); } @@ -893,6 +906,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ mi_assert_internal(mi_commit_mask_is_empty(&decommit_mask)); segment->decommit_expire = 0; mi_commit_mask_create_empty( &segment->decommit_mask ); + mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); } } From f2b6938d64d555f2053612da2e84fcb128bd9116 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Feb 2022 17:36:14 -0800 Subject: [PATCH 152/352] fix start adjustment for the commit mask --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index b000e641..c4cf9875 100644 --- a/src/segment.c +++ b/src/segment.c @@ -487,7 +487,7 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE); end = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE); } - if (start < segstart) { + if (pstart >= segstart && start < segstart) { // note: the mask is also calculated for an initial commit of the info area start = segstart; } if (end > segsize) { From e87b1d2298313f2ec47da0d76dbfc195742126fc Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 10 Feb 2022 11:08:13 -0800 Subject: [PATCH 153/352] add extra huge allocation test --- test/main-override-static.c | 351 +++++++++++++++++++----------------- 1 file changed, 182 insertions(+), 169 deletions(-) diff --git a/test/main-override-static.c b/test/main-override-static.c index afb9131e..07116503 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -8,172 +8,6 @@ #include // redefines malloc etc. -#include -#include - -#define MI_INTPTR_SIZE 8 -#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) - -#define MI_BIN_HUGE 100 -//#define MI_ALIGN2W - -// Bit scan reverse: return the index of the highest bit. -static inline uint8_t mi_bsr32(uint32_t x); - -#if defined(_MSC_VER) -#include -#include -static inline uint8_t mi_bsr32(uint32_t x) { - uint32_t idx; - _BitScanReverse((DWORD*)&idx, x); - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -static inline uint8_t mi_bsr32(uint32_t x) { - return (31 - __builtin_clz(x)); -} -#else -static inline uint8_t mi_bsr32(uint32_t x) { - // de Bruijn multiplication, see - static const uint8_t debruijn[32] = { - 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, - 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, - }; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return debruijn[(x*0x076be629) >> 27]; -} -#endif - -/* -// Bit scan reverse: return the index of the highest bit. -uint8_t _mi_bsr(uintptr_t x) { - if (x == 0) return 0; - #if MI_INTPTR_SIZE==8 - uint32_t hi = (x >> 32); - return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); - #elif MI_INTPTR_SIZE==4 - return mi_bsr32(x); - #else - # error "define bsr for non-32 or 64-bit platforms" - #endif -} -*/ - - -static inline size_t _mi_wsize_from_size(size_t size) { - return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); -} - -// Return the bin for a given field size. -// Returns MI_BIN_HUGE if the size is too large. -// We use `wsize` for the size in "machine word sizes", -// i.e. byte size == `wsize*sizeof(void*)`. -extern inline uint8_t _mi_bin8(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; - } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; - } - else { - #if defined(MI_ALIGN4W) - if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes - #endif - wsize--; - // find the highest bit - uint8_t b = mi_bsr32((uint32_t)wsize); - // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). - // - adjust with 3 because we use do not round the first 8 sizes - // which each get an exact bin - bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; - } - return bin; -} - -extern inline uint8_t _mi_bin4(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; - } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; - } - else { - uint8_t b = mi_bsr32((uint32_t)wsize); - bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; - } - return bin; -} - -size_t _mi_binx4(size_t bsize) { - if (bsize==0) return 0; - uint8_t b = mi_bsr32((uint32_t)bsize); - if (b <= 1) return bsize; - size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); - return bin; -} - -size_t _mi_binx8(size_t bsize) { - if (bsize<=1) return bsize; - uint8_t b = mi_bsr32((uint32_t)bsize); - if (b <= 2) return bsize; - size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; - return bin; -} - -void mi_bins() { - //printf(" QNULL(1), /* 0 */ \\\n "); - size_t last_bin = 0; - size_t min_bsize = 0; - size_t last_bsize = 0; - for (size_t bsize = 1; bsize < 2*1024; bsize++) { - size_t size = bsize * 64 * 1024; - size_t bin = _mi_binx8(bsize); - if (bin != last_bin) { - printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); - //printf("QNULL(%6zd), ", wsize); - //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); - last_bin = bin; - min_bsize = bsize; - } - last_bsize = bsize; - } -} - static void double_free1(); static void double_free2(); static void corrupt_free(); @@ -183,7 +17,7 @@ static void test_aslr(void); static void test_process_info(void); static void test_reserved(void); static void negative_stat(void); - +static void alloc_huge(void); int main() { mi_version(); @@ -197,6 +31,7 @@ int main() { // invalid_free(); // test_reserved(); // negative_stat(); + alloc_huge(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -210,7 +45,7 @@ int main() { free(p1); free(p2); free(s); - + /* now test if override worked by allocating/freeing across the api's*/ //p1 = mi_malloc(32); //free(p1); @@ -347,4 +182,182 @@ static void negative_stat(void) { *p = 100; mi_free(p); mi_stats_print_out(NULL, NULL); -} \ No newline at end of file +} + +static void alloc_huge(void) { + void* p = mi_malloc(67108872); + mi_free(p); +} + + +// ---------------------------- +// bin size experiments +// ------------------------------ + +#if 0 +#include +#include + +#define MI_INTPTR_SIZE 8 +#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) + +#define MI_BIN_HUGE 100 +//#define MI_ALIGN2W + +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +#include +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse((DWORD*)&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + +/* +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; + #if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); + #elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); + #else + # error "define bsr for non-32 or 64-bit platforms" + #endif +} +*/ + + +static inline size_t _mi_wsize_from_size(size_t size) { + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +extern inline uint8_t _mi_bin8(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } +#if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } +#endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { +#if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes +#endif + wsize--; + // find the highest bit + uint8_t b = mi_bsr32((uint32_t)wsize); + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + } + return bin; +} + +static inline uint8_t _mi_bin4(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } +#if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } +#endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + uint8_t b = mi_bsr32((uint32_t)wsize); + bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; + } + return bin; +} + +static size_t _mi_binx4(size_t bsize) { + if (bsize==0) return 0; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 1) return bsize; + size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); + return bin; +} + +static size_t _mi_binx8(size_t bsize) { + if (bsize<=1) return bsize; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 2) return bsize; + size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; + return bin; +} + +static void mi_bins(void) { + //printf(" QNULL(1), /* 0 */ \\\n "); + size_t last_bin = 0; + size_t min_bsize = 0; + size_t last_bsize = 0; + for (size_t bsize = 1; bsize < 2*1024; bsize++) { + size_t size = bsize * 64 * 1024; + size_t bin = _mi_binx8(bsize); + if (bin != last_bin) { + printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); + //printf("QNULL(%6zd), ", wsize); + //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); + last_bin = bin; + min_bsize = bsize; + } + last_bsize = bsize; + } +} +#endif \ No newline at end of file From 96008c55d0add668dbb09d135f6ca18a2f6a322e Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 10 Feb 2022 11:57:30 -0800 Subject: [PATCH 154/352] fix ubsan warning on huge allocations (issue #543) --- src/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index c4cf9875..8d3eebe5 100644 --- a/src/segment.c +++ b/src/segment.c @@ -762,7 +762,8 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i } // and also for the last one (if not set already) (the last one is needed for coalescing) - mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; + // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) + mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; if (last < mi_segment_slices_end(segment) && last >= slice) { last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1)); last->slice_count = 0; From b89b4fd18a103eda7397cc7000b5ee5eda2f3cd8 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 14 Feb 2022 16:44:33 -0800 Subject: [PATCH 155/352] fix v2.0.5 version --- cmake/mimalloc-config-version.cmake | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 76b2af6c..acbd0f70 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,12 +1,6 @@ -<<<<<<< HEAD set(mi_version_major 2) set(mi_version_minor 0) -set(mi_version_patch 4) -======= -set(mi_version_major 1) -set(mi_version_minor 7) set(mi_version_patch 5) ->>>>>>> dev set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) From 9f6cbc50eeb20a227fe1def30cb68be8e84b1c32 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 7 Apr 2022 09:48:08 -0700 Subject: [PATCH 156/352] use heap_stat_decrease when possible --- src/page.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/page.c b/src/page.c index 94fc707d..abc73685 100644 --- a/src/page.c +++ b/src/page.c @@ -371,12 +371,12 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { const size_t bsize = mi_page_block_size(page); if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) { if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.large, bsize); + mi_heap_stat_decrease(heap, large, bsize); } else { // not strictly necessary as we never get here for a huge page mi_assert_internal(false); - _mi_stat_decrease(&heap->tld->stats.huge, bsize); + mi_heap_stat_decrease(heap, huge, bsize); } } From 332346b685808db68b97e0870cbdc82c1ba6e76d Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 7 Apr 2022 10:38:31 -0700 Subject: [PATCH 157/352] remove unneeded MI_HUGE_OBJ_SIZE_MAX --- include/mimalloc-types.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 310fb92b..0456884b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -156,7 +156,6 @@ typedef int32_t mi_ssize_t; #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) -#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) @@ -175,7 +174,7 @@ typedef int32_t mi_ssize_t; #define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) // Used as a special value to encode block sizes in 32 bits. -#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) +#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB)) // blocks up to this size are always allocated aligned #define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) From 0cda8b02d5bdaa6d23c8862729cce624d5f07964 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 7 Apr 2022 11:08:54 -0700 Subject: [PATCH 158/352] fix stats for large objects that were off by the block size padding --- src/alloc.c | 24 +++++++++++++++--------- src/page.c | 13 +------------ 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 8cf72429..58115daa 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -45,7 +45,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz #if (MI_STAT>0) const size_t bsize = mi_page_usable_block_size(page); - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { mi_heap_stat_increase(heap, normal, bsize); mi_heap_stat_counter_increase(heap, normal_count, 1); #if (MI_STAT>1) @@ -297,20 +297,26 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co // only maintain stats for smaller objects if requested #if (MI_STAT>0) static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { -#if (MI_STAT < 2) + #if (MI_STAT < 2) MI_UNUSED(block); -#endif + #endif mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_usable_block_size(page); -#if (MI_STAT>1) + const size_t bsize = mi_page_usable_block_size(page); + #if (MI_STAT>1) const size_t usize = mi_page_usable_size_of(page, block); mi_heap_stat_decrease(heap, malloc, usize); -#endif - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + #endif + if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, normal, bsize); -#if (MI_STAT > 1) + #if (MI_STAT > 1) mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); -#endif + #endif + } + else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, large, bsize); + } + else { + mi_heap_stat_decrease(heap, huge, bsize); } } #else diff --git a/src/page.c b/src/page.c index abc73685..1849dc8f 100644 --- a/src/page.c +++ b/src/page.c @@ -368,17 +368,6 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_page_set_has_aligned(page, false); mi_heap_t* heap = mi_page_heap(page); - const size_t bsize = mi_page_block_size(page); - if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) { - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, large, bsize); - } - else { - // not strictly necessary as we never get here for a huge page - mi_assert_internal(false); - mi_heap_stat_decrease(heap, huge, bsize); - } - } // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) @@ -791,7 +780,7 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size)); mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size); if (page != NULL) { - const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding + const size_t bsize = mi_page_usable_block_size(page); // note: includes padding mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(bsize >= size); From ea0f5b8779e905736a6e10e4c5e14af3d9590d9d Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 8 Apr 2022 14:52:15 -0700 Subject: [PATCH 159/352] use new MI_ATOMIC_VAR_INIT --- src/segment-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 93908c8f..aacdbc11 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_CACHE_FIELDS (16) #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit -#define BITS_SET() ATOMIC_VAR_INIT(UINTPTR_MAX) +#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) #define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes typedef struct mi_cache_slot_s { From dd929659ab4329ed3d42c423e692ab418cff1856 Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 14 Apr 2022 11:28:40 -0700 Subject: [PATCH 160/352] fix wrong assertion --- src/page.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/page.c b/src/page.c index 1849dc8f..fd6c5397 100644 --- a/src/page.c +++ b/src/page.c @@ -780,10 +780,8 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size)); mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size); if (page != NULL) { - const size_t bsize = mi_page_usable_block_size(page); // note: includes padding mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(bsize >= size); - + if (pq == NULL) { // huge pages are directly abandoned mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); @@ -794,6 +792,8 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { else { mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); } + + const size_t bsize = mi_page_usable_block_size(page); // note: not `mi_page_block_size` to account for padding if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_increase(heap, large, bsize); mi_heap_stat_counter_increase(heap, large_count, 1); From f9416ce71c3f0cf7a907cf6426c53e5004f1cca1 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 14 Apr 2022 16:09:12 -0700 Subject: [PATCH 161/352] merge from dev --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 80feacf3..ecd28dbb 100644 --- a/src/options.c +++ b/src/options.c @@ -91,10 +91,10 @@ static mi_option_desc_t options[_mi_option_last] = { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output + { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments { 2, UNINIT, MI_OPTION(decommit_extend_delay) }, - { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. }; static void mi_option_init(mi_option_desc_t* desc); From f819dbb4e4813fab464aee16770f39f11476bfea Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 14 Apr 2022 16:12:02 -0700 Subject: [PATCH 162/352] fix trailing comma --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index ecd28dbb..b07e0e77 100644 --- a/src/options.c +++ b/src/options.c @@ -94,7 +94,7 @@ static mi_option_desc_t options[_mi_option_last] = { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments - { 2, UNINIT, MI_OPTION(decommit_extend_delay) }, + { 2, UNINIT, MI_OPTION(decommit_extend_delay) } }; static void mi_option_init(mi_option_desc_t* desc); From a949c9321cce0dc94a59a3f0860ca496d014912e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 19 Apr 2022 11:17:53 -0700 Subject: [PATCH 163/352] update vs2022 solution --- ide/vs2022/mimalloc-override-test.vcxproj | 8 ++++---- ide/vs2022/mimalloc-override.vcxproj | 10 +++++----- ide/vs2022/mimalloc-test-api.vcxproj | 10 +++++----- ide/vs2022/mimalloc-test-stress.vcxproj | 8 ++++---- ide/vs2022/mimalloc-test.vcxproj | 8 ++++---- ide/vs2022/mimalloc.vcxproj | 10 +++++----- 6 files changed, 27 insertions(+), 27 deletions(-) diff --git a/ide/vs2022/mimalloc-override-test.vcxproj b/ide/vs2022/mimalloc-override-test.vcxproj index a3c56f7b..7a9202f1 100644 --- a/ide/vs2022/mimalloc-override-test.vcxproj +++ b/ide/vs2022/mimalloc-override-test.vcxproj @@ -29,23 +29,23 @@ Application true - v143 + v142 Application false - v143 + v142 true Application true - v143 + v142 Application false - v143 + v142 true diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index d50c4e6a..4136e574 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -29,22 +29,22 @@ DynamicLibrary true - v143 + v142 DynamicLibrary false - v143 + v142 DynamicLibrary true - v143 + v142 DynamicLibrary false - v143 + v142 @@ -236,7 +236,6 @@ - @@ -247,6 +246,7 @@ + diff --git a/ide/vs2022/mimalloc-test-api.vcxproj b/ide/vs2022/mimalloc-test-api.vcxproj index 6023c251..812a9cb1 100644 --- a/ide/vs2022/mimalloc-test-api.vcxproj +++ b/ide/vs2022/mimalloc-test-api.vcxproj @@ -29,23 +29,23 @@ Application true - v143 + v142 Application false - v143 + v142 true Application true - v143 + v142 Application false - v143 + v142 true @@ -152,4 +152,4 @@ - \ No newline at end of file + diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index c7e820df..ef7ab357 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -29,23 +29,23 @@ Application true - v143 + v142 Application false - v143 + v142 true Application true - v143 + v142 Application false - v143 + v142 true diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj index 506dd7d4..13af6ab4 100644 --- a/ide/vs2022/mimalloc-test.vcxproj +++ b/ide/vs2022/mimalloc-test.vcxproj @@ -29,23 +29,23 @@ Application true - v143 + v142 Application false - v143 + v142 true Application true - v143 + v142 Application false - v143 + v142 true diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 125d4050..9f967d94 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -29,23 +29,23 @@ StaticLibrary true - v143 + v142 StaticLibrary false - v143 + v142 true StaticLibrary true - v143 + v142 StaticLibrary false - v143 + v142 true @@ -225,7 +225,6 @@ - true @@ -235,6 +234,7 @@ + From a90b98a1441b5cc75e4b0f1f7ec93157cdb1ce2e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 19 Apr 2022 19:57:57 -0700 Subject: [PATCH 164/352] update to vs2022 --- ide/vs2022/mimalloc-override-test.vcxproj | 8 ++++---- ide/vs2022/mimalloc-override.vcxproj | 8 ++++---- ide/vs2022/mimalloc-test-api.vcxproj | 10 +++++----- ide/vs2022/mimalloc-test-stress.vcxproj | 8 ++++---- ide/vs2022/mimalloc-test.vcxproj | 8 ++++---- ide/vs2022/mimalloc.vcxproj | 8 ++++---- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/ide/vs2022/mimalloc-override-test.vcxproj b/ide/vs2022/mimalloc-override-test.vcxproj index 7a9202f1..a3c56f7b 100644 --- a/ide/vs2022/mimalloc-override-test.vcxproj +++ b/ide/vs2022/mimalloc-override-test.vcxproj @@ -29,23 +29,23 @@ Application true - v142 + v143 Application false - v142 + v143 true Application true - v142 + v143 Application false - v142 + v143 true diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 4136e574..f10376c7 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -29,22 +29,22 @@ DynamicLibrary true - v142 + v143 DynamicLibrary false - v142 + v143 DynamicLibrary true - v142 + v143 DynamicLibrary false - v142 + v143 diff --git a/ide/vs2022/mimalloc-test-api.vcxproj b/ide/vs2022/mimalloc-test-api.vcxproj index 812a9cb1..6023c251 100644 --- a/ide/vs2022/mimalloc-test-api.vcxproj +++ b/ide/vs2022/mimalloc-test-api.vcxproj @@ -29,23 +29,23 @@ Application true - v142 + v143 Application false - v142 + v143 true Application true - v142 + v143 Application false - v142 + v143 true @@ -152,4 +152,4 @@ - + \ No newline at end of file diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index ef7ab357..c7e820df 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -29,23 +29,23 @@ Application true - v142 + v143 Application false - v142 + v143 true Application true - v142 + v143 Application false - v142 + v143 true diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj index 13af6ab4..506dd7d4 100644 --- a/ide/vs2022/mimalloc-test.vcxproj +++ b/ide/vs2022/mimalloc-test.vcxproj @@ -29,23 +29,23 @@ Application true - v142 + v143 Application false - v142 + v143 true Application true - v142 + v143 Application false - v142 + v143 true diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index f29e0977..0a45006c 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -29,23 +29,23 @@ StaticLibrary true - v142 + v143 StaticLibrary false - v142 + v143 true StaticLibrary true - v142 + v143 StaticLibrary false - v142 + v143 true From 83d84b8703644a22839f956dbf4380fa4f272d48 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Apr 2022 09:54:24 -0700 Subject: [PATCH 165/352] increase max alignment limit to 16MiB (issue #576) --- include/mimalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index c752ac24..c776efeb 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -166,7 +166,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s // Note that `alignment` always follows `size` for consistency with unaligned // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -#define MI_ALIGNMENT_MAX (1024*1024UL) // maximum supported alignment is 1MiB +#define MI_ALIGNMENT_MAX (16*1024*1024UL) // maximum supported alignment is 16MiB mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); From c128cf69bee927a741b9293ad4298342a7110278 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 30 Oct 2022 19:47:54 -0700 Subject: [PATCH 166/352] fix alignment_max for 32-bit systems (unfortunately, we need to include stdint.h now) --- include/mimalloc.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/mimalloc.h b/include/mimalloc.h index c776efeb..eacf3977 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -95,6 +95,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // size_t #include // bool +#include // INTPTR_MAX #ifdef __cplusplus extern "C" { @@ -166,7 +167,11 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s // Note that `alignment` always follows `size` for consistency with unaligned // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- +#if (INTPTR_MAX > INT32_MAX) #define MI_ALIGNMENT_MAX (16*1024*1024UL) // maximum supported alignment is 16MiB +#else +#define MI_ALIGNMENT_MAX (1024*1024UL) // maximum supported alignment for 32-bit systems is 1MiB +#endif mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); From 9f36808a7f898d20cbabe3c360df9e5732d620cf Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 1 Nov 2022 16:22:51 -0700 Subject: [PATCH 167/352] initial api for heaps restricted to a certain arena --- include/mimalloc-internal.h | 4 ++- include/mimalloc-types.h | 1 + include/mimalloc.h | 2 +- src/arena.c | 16 +++++----- src/bitmap.c | 19 ++++++++++++ src/bitmap.h | 4 +++ src/heap.c | 11 ++++++- src/init.c | 2 ++ src/segment-cache.c | 14 +++++++-- src/segment.c | 58 +++++++++++++++++++++---------------- test/main-override-static.c | 18 +++++++++++- 11 files changed, 109 insertions(+), 40 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index a4d0a4a9..550b6543 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -93,9 +93,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); mi_arena_id_t _mi_arena_id_none(void); +bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id); // "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); @@ -142,6 +143,7 @@ uint8_t _mi_bin(size_t size); // for stats void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid); // "stats.c" void _mi_stats_done(mi_stats_t* stats); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b55d57fa..800d9413 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -438,6 +438,7 @@ struct mi_heap_s { mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") _Atomic(mi_block_t*) thread_delayed_free; mi_threadid_t thread_id; // thread this heap belongs too + mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation diff --git a/include/mimalloc.h b/include/mimalloc.h index db0b06c3..3a0c790e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -287,7 +287,7 @@ mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; #if MI_MALLOC_VERSION >= 200 -mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id, bool exclusive); +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); #endif // deprecated diff --git a/src/arena.c b/src/arena.c index fbbb0205..8de84001 100644 --- a/src/arena.c +++ b/src/arena.c @@ -97,8 +97,9 @@ mi_arena_id_t _mi_arena_id_none(void) { return 0; } -static bool mi_arena_id_suitable(mi_arena_id_t arena_id, bool exclusive, mi_arena_id_t req_arena_id) { - return (!exclusive || arena_id == req_arena_id); +static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { + return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || + (arena_id == req_arena_id)); } @@ -117,18 +118,16 @@ static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_ } static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { - mi_assert_internal(arena_memid != MI_MEMID_OS); *bitmap_index = (arena_memid >> 8); mi_arena_id_t id = (int)(arena_memid & 0x7F); *arena_index = mi_arena_id_index(id); return ((arena_memid & 0x80) != 0); } -bool _mi_arena_memid_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) { - mi_assert_internal(arena_memid != MI_MEMID_OS); +bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) { mi_arena_id_t id = (int)(arena_memid & 0x7F); bool exclusive = ((arena_memid & 0x80) != 0); - return mi_arena_id_suitable(id, exclusive, request_arena_id); + return mi_arena_id_is_suitable(id, exclusive, request_arena_id); } static size_t mi_block_count_of_size(size_t size) { @@ -159,7 +158,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren { MI_UNUSED(arena_index); mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); - if (!mi_arena_id_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; mi_bitmap_index_t bitmap_index; if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; @@ -266,7 +265,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* } // finally, fall back to the OS - if (mi_option_is_enabled(mi_option_limit_os_alloc)) { + if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } @@ -282,6 +281,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, b return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); } + /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ diff --git a/src/bitmap.c b/src/bitmap.c index 4e85d687..4fc7a1f3 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -108,6 +108,25 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel return false; } +// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled +bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, + const size_t start_field_idx, const size_t count, + mi_bitmap_pred_fun_t pred_fun, void* pred_arg, + mi_bitmap_index_t* bitmap_idx) { + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap + if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + if (pred_fun == NULL || pred_fun(*bitmap_idx, pred_arg)) { + return true; + } + // predicate returned false, unclaim and look further + _mi_bitmap_unclaim(bitmap, bitmap_fields, count, *bitmap_idx); + } + } + return false; +} + /* // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. diff --git a/src/bitmap.h b/src/bitmap.h index 7bd3106c..0c501ec1 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -72,6 +72,10 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); +// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled +typedef bool (mi_cdecl *mi_bitmap_pred_fun_t)(mi_bitmap_index_t bitmap_idx, void* pred_arg); +bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx); + // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); diff --git a/src/heap.c b/src/heap.c index bc103a01..15ca3603 100644 --- a/src/heap.c +++ b/src/heap.c @@ -200,13 +200,14 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { +mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? if (heap==NULL) return NULL; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); + heap->arena_id = arena_id; _mi_random_split(&bheap->random, &heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); @@ -218,6 +219,14 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { return heap; } +mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { + return mi_heap_new_in_arena(_mi_arena_id_none()); +} + +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) { + return _mi_arena_memid_is_suitable(memid, heap->arena_id); +} + uintptr_t _mi_heap_random_next(mi_heap_t* heap) { return _mi_random_next(&heap->random); } diff --git a/src/init.c b/src/init.c index 66e71ef2..4f37b717 100644 --- a/src/init.c +++ b/src/init.c @@ -109,6 +109,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { MI_ATOMIC_VAR_INIT(NULL), 0, // tid 0, // cookie + 0, // arena id { 0, 0 }, // keys { {0}, {0}, 0 }, 0, // page count @@ -149,6 +150,7 @@ mi_heap_t _mi_heap_main = { MI_ATOMIC_VAR_INIT(NULL), 0, // thread id 0, // initial cookie + 0, // arena id { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { {0x846ca68b}, {0}, 0 }, // random 0, // page count diff --git a/src/segment-cache.c b/src/segment-cache.c index eac8f843..da726716 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -39,8 +39,13 @@ static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free +static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { + mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); +} -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return NULL; @@ -60,12 +65,15 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm // find an available slot mi_bitmap_index_t bitidx = 0; bool claimed = false; + mi_arena_id_t req_arena_id = _req_arena_id; + mi_bitmap_pred_fun_t pred_fun = &mi_segment_cache_is_suitable; // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? + if (*large) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); + claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = true; } if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); + claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = false; } diff --git a/src/segment.c b/src/segment.c index 46bba9d7..2ae591fd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -721,7 +721,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i return page; } -static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segments_tld_t* tld) { +static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX); // search from best fit up mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld); @@ -730,19 +730,23 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) { if (slice->slice_count >= slice_count) { // found one - mi_span_queue_delete(sq, slice); mi_segment_t* segment = _mi_ptr_segment(slice); - if (slice->slice_count > slice_count) { - mi_segment_slice_split(segment, slice, slice_count, tld); + if (_mi_arena_memid_is_suitable(segment->memid, req_arena_id)) { + // found a suitable page span + mi_span_queue_delete(sq, slice); + + if (slice->slice_count > slice_count) { + mi_segment_slice_split(segment, slice, slice_count, tld); + } + mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); + mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); + if (page == NULL) { + // commit failed; return NULL but first restore the slice + mi_segment_span_free_coalesce(slice, tld); + return NULL; + } + return page; } - mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); - mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); - if (page == NULL) { - // commit failed; return NULL but first restore the slice - mi_segment_span_free_coalesce(slice, tld); - return NULL; - } - return page; } } sq++; @@ -757,7 +761,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm ----------------------------------------------------------- */ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) +static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) { mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL)); mi_assert_internal((segment==NULL) || (segment!=NULL && required==0)); @@ -793,9 +797,9 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy bool is_pinned = false; size_t memid = 0; - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld); + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld); if (segment==NULL) { - segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, os_tld); + segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (commit) { mi_commit_mask_create_full(&commit_mask); @@ -908,8 +912,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) { - return mi_segment_init(NULL, required, tld, os_tld, huge_page); +static mi_segment_t* mi_segment_alloc(size_t required, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) { + return mi_segment_init(NULL, required, req_arena_id, tld, os_tld, huge_page); } @@ -1368,6 +1372,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { segment->abandoned_visits++; + // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments + // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way? + bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid); bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { // free the segment (by forced reclaim) to make it available to other threads. @@ -1377,13 +1384,13 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice // freeing but that would violate some invariants temporarily) mi_segment_reclaim(segment, heap, 0, NULL, tld); } - else if (has_page) { + else if (has_page && is_suitable) { // found a large enough free span, or a page of the right block_size with free space // we return the result of reclaim (which is usually `segment`) as it might free // the segment due to concurrent frees (in which case `NULL` is returned). return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } - else if (segment->abandoned_visits > 3) { + else if (segment->abandoned_visits > 3 && is_suitable) { // always reclaim on 3rd visit to limit the abandoned queue length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } @@ -1425,7 +1432,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) Reclaim or allocate ----------------------------------------------------------- */ -static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); @@ -1443,7 +1450,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_ return segment; } // 2. otherwise allocate a fresh segment - return mi_segment_alloc(0, tld, os_tld, NULL); + return mi_segment_alloc(0, heap->arena_id, tld, os_tld, NULL); } @@ -1459,7 +1466,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE)); size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size); - mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); + mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, heap->arena_id, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); if (page==NULL) { // no free page, allocate a new segment and try again if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) { @@ -1483,10 +1490,10 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki Huge page allocation ----------------------------------------------------------- */ -static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page = NULL; - mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page); + mi_segment_t* segment = mi_segment_alloc(size,req_arena_id,tld,os_tld,&page); if (segment == NULL || page==NULL) return NULL; mi_assert_internal(segment->used==1); mi_assert_internal(mi_page_block_size(page) >= size); @@ -1536,8 +1543,9 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld); } else { - page = mi_segment_huge_page_alloc(block_size,tld,os_tld); + page = mi_segment_huge_page_alloc(block_size,heap->arena_id,tld,os_tld); } + mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid)); mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); return page; } diff --git a/test/main-override-static.c b/test/main-override-static.c index adc07aee..70b6293c 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -19,6 +19,7 @@ static void test_reserved(void); static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); +static void test_heap_arena(void); int main() { mi_version(); @@ -33,7 +34,8 @@ int main() { // test_reserved(); // negative_stat(); // alloc_huge(); - test_heap_walk(); + // test_heap_walk(); + test_heap_arena(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -212,6 +214,20 @@ static void test_heap_walk(void) { mi_heap_visit_blocks(heap, true, &test_visit, NULL); } +static void test_heap_arena(void) { + mi_arena_id_t arena_id; + int err = mi_reserve_os_memory_ex(100 * 1024 * 1024, false /* commit */, false /* allow large */, true /* exclusive */, &arena_id); + if (err) abort(); + mi_heap_t* heap = mi_heap_new_in_arena(arena_id); + for (int i = 0; i < 500000; i++) { + void* p = mi_heap_malloc(heap, 1024); + if (p == NULL) { + printf("out of memory after %d kb (expecting about 100_000kb)\n", i); + break; + } + } +} + // ---------------------------- // bin size experiments // ------------------------------ From f859190cba4cb0812e443b9dc182a4af7aa205a3 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 3 Nov 2022 17:05:38 -0700 Subject: [PATCH 168/352] update to v2.0.7 --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 8063afe6..f0669c84 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 2) set(mi_version_minor 0) -set(mi_version_patch 6) +set(mi_version_patch 7) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 7ca819c2..32eab19e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 206 // major + 2 digits minor +#define MI_MALLOC_VERSION 207 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 0e3d543a1391a010cca18d2935557d9ce1495a51 Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 3 Nov 2022 17:11:21 -0700 Subject: [PATCH 169/352] Update readme.md --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index fe2ead69..102efc15 100644 --- a/readme.md +++ b/readme.md @@ -77,7 +77,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. -* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind] for leak testing and heap block overflow detection. Initial +* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow detection. Initial support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . * 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation From 18a4b90501cc93ad853b012a1c129d468e8358aa Mon Sep 17 00:00:00 2001 From: Ofek Lev Date: Sat, 5 Nov 2022 16:29:18 -0400 Subject: [PATCH 170/352] Fix typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 102efc15..58863099 100644 --- a/readme.md +++ b/readme.md @@ -73,7 +73,7 @@ Enjoy! ### Releases -Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage +Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to reduce memory usage and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. From 562efed54d36e436518fbb0d49d67e74f8a33207 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Nov 2022 20:36:51 -0800 Subject: [PATCH 171/352] fix full SEGMENT_SIZE internal alignment by adding one more slice entry --- include/mimalloc-internal.h | 6 +++--- include/mimalloc-types.h | 2 +- src/alloc-aligned.c | 2 +- src/segment.c | 14 +++++++++----- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 40bb1349..243a45a9 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -481,11 +481,11 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { // Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + mi_assert_internal(p > segment); ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE /* can be equal for large alignment */); - if (diff == MI_SEGMENT_SIZE) diff--; + mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; - mi_assert_internal(idx < segment->slice_entries); + mi_assert_internal(idx <= segment->slice_entries); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data mi_assert_internal(slice->slice_offset == 0); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b5931789..b960a460 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -381,7 +381,7 @@ typedef struct mi_segment_s { mi_segment_kind_t kind; _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` - mi_slice_t slices[MI_SLICES_PER_SEGMENT]; + mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment } mi_segment_t; diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 76ed0ed7..06ed5272 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -63,7 +63,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true); } - mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); + // mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); #if MI_TRACK_ENABLED diff --git a/src/segment.c b/src/segment.c index c743f02f..f637e7a9 100644 --- a/src/segment.c +++ b/src/segment.c @@ -277,7 +277,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { } // and the last entry as well (for coalescing) const mi_slice_t* last = slice + slice->slice_count - 1; - if (last > slice && last < mi_segment_slices_end(segment)) { + if (last > slice && last <= mi_segment_slices_end(segment)) { mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t)); mi_assert_internal(last->slice_count == 0); mi_assert_internal(last->xblock_size == 1); @@ -709,9 +709,13 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i // and also for the last one (if not set already) (the last one is needed for coalescing) // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) - mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; - if (last < mi_segment_slices_end(segment) && last >= slice) { - last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1)); + size_t slice_last_index = slice_index + slice_count - 1; + if (slice_last_index >= segment->slice_entries) { + slice_last_index = segment->slice_entries; + } + mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_last_index]; + if (last <= mi_segment_slices_end(segment) && last >= slice) { + last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_last_index - slice_index)); last->slice_count = 0; last->xblock_size = 1; } @@ -853,7 +857,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, siz if (!is_zero) { ptrdiff_t ofs = offsetof(mi_segment_t, next); size_t prefix = offsetof(mi_segment_t, slices) - ofs; - memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices); + memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1)); // one more } if (!commit_info_still_good) { From 651a99b35d3a70b764524813af4d205333866653 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Nov 2022 20:57:27 -0800 Subject: [PATCH 172/352] refine last slice setting for large alignments --- include/mimalloc-internal.h | 2 +- include/mimalloc-types.h | 3 +-- src/segment.c | 29 ++++++++++++++--------------- test/test-api.c | 7 ++++--- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 243a45a9..bb4f50d3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -481,7 +481,7 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { // Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { - mi_assert_internal(p > segment); + mi_assert_internal(p > (void*)segment); ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b960a460..0cef11da 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -174,8 +174,7 @@ typedef int32_t mi_ssize_t; #endif // Maximum slice offset (15) -// #define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) -#define MI_MAX_SLICE_OFFSET ((MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE)) +#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) // Used as a special value to encode block sizes in 32 bits. #define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB)) diff --git a/src/segment.c b/src/segment.c index f637e7a9..0a5ac3c7 100644 --- a/src/segment.c +++ b/src/segment.c @@ -277,7 +277,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { } // and the last entry as well (for coalescing) const mi_slice_t* last = slice + slice->slice_count - 1; - if (last > slice && last <= mi_segment_slices_end(segment)) { + if (last > slice && last < mi_segment_slices_end(segment)) { mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t)); mi_assert_internal(last->slice_count == 0); mi_assert_internal(last->xblock_size == 1); @@ -679,7 +679,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz // Note: may still return NULL if committing the memory failed static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); - mi_slice_t* slice = &segment->slices[slice_index]; + mi_slice_t* const slice = &segment->slices[slice_index]; mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1); // commit before changing the slice data @@ -700,22 +700,21 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i size_t extra = slice_count-1; if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET; if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1; // huge objects may have more slices than avaiable entries in the segment->slices - slice++; - for (size_t i = 1; i <= extra; i++, slice++) { - slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i); - slice->slice_count = 0; - slice->xblock_size = 1; + + mi_slice_t* slice_next = slice + 1; + for (size_t i = 1; i <= extra; i++, slice_next++) { + slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i); + slice_next->slice_count = 0; + slice_next->xblock_size = 1; } - // and also for the last one (if not set already) (the last one is needed for coalescing) + // and also for the last one (if not set already) (the last one is needed for coalescing and for large alignments) // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) - size_t slice_last_index = slice_index + slice_count - 1; - if (slice_last_index >= segment->slice_entries) { - slice_last_index = segment->slice_entries; - } - mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_last_index]; - if (last <= mi_segment_slices_end(segment) && last >= slice) { - last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_last_index - slice_index)); + mi_slice_t* last = slice + slice_count - 1; + mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment); + if (last > end) last = end; + if (last > slice) { + last->slice_offset = (uint32_t)(sizeof(mi_slice_t) * (last - slice)); last->slice_count = 0; last->xblock_size = 1; } diff --git a/test/test-api.c b/test/test-api.c index 312b3f1b..01ef98bd 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -177,10 +177,11 @@ int main(void) { }; CHECK_BODY("malloc-aligned9") { bool ok = true; - for (int i = 0; i < 5 && ok; i++) { + for (int i = 0; i < 8 && ok; i++) { int n = (1 << i); - void* p = mi_malloc_aligned( 2*n*MI_ALIGNMENT_MAX, n*MI_ALIGNMENT_MAX); - ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0; + size_t align = n * (MI_ALIGNMENT_MAX / 8); + void* p = mi_malloc_aligned( 2*align, align); + ok = ((uintptr_t)p % align) == 0; mi_free(p); } result = ok; From 1632dd73c9254322f3d65f696195b9b7005ac445 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 6 Nov 2022 21:03:23 -0800 Subject: [PATCH 173/352] remove superfluous asserts --- include/mimalloc-internal.h | 2 +- src/alloc-aligned.c | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index bb4f50d3..192e14da 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -479,7 +479,7 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { return start; } -// Get the page containing the pointer +// Get the page containing the pointer (performance critical as it is called in mi_free) static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { mi_assert_internal(p > (void*)segment); ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 06ed5272..66a26b49 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -43,12 +43,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* } oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block - if (p == NULL) return NULL; - const uintptr_t adjustx = alignment - (((uintptr_t)p + offset) & align_mask); - const mi_page_t* page = _mi_ptr_page(p); - const size_t bsize = mi_page_usable_block_size(page); - mi_assert_internal(bsize >= adjustx + size); - mi_assert_internal(true); + if (p == NULL) return NULL; } else { // otherwise over-allocate @@ -63,8 +58,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); if (aligned_p != p) { mi_page_set_has_aligned(_mi_ptr_page(p), true); } - // mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); + mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); #if MI_TRACK_ENABLED if (p != aligned_p) { From 96f1574faf9739ee6eca5a55df9370767a094247 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Nov 2022 10:51:15 -0800 Subject: [PATCH 174/352] fix huge page aligned allocation size in secure mode --- ide/vs2022/mimalloc.vcxproj | 2 +- src/segment.c | 24 ++++++++++++++++-------- test/test-api.c | 2 +- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 616c226c..9811aa55 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -116,7 +116,7 @@ true true ../../include - MI_DEBUG=4;%(PreprocessorDefinitions); + MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions); CompileAsCpp false stdcpp20 diff --git a/src/segment.c b/src/segment.c index 0a5ac3c7..22b9ccd0 100644 --- a/src/segment.c +++ b/src/segment.c @@ -336,12 +336,14 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz size_t page_size = _mi_os_page_size(); size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); size_t guardsize = 0; - + if (MI_SECURE>0) { // in secure mode, we set up a protected page in between the segment info // and the page data (and one at the end of the segment) - guardsize = page_size; - required = _mi_align_up(required, page_size); + guardsize = page_size; + if (required > 0) { + required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size; + } } if (pre_size != NULL) *pre_size = isize; @@ -802,21 +804,27 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, siz size_t memid = 0; size_t align_offset = 0; size_t alignment = MI_SEGMENT_SIZE; - size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE; - + if (page_alignment > 0) { mi_assert_internal(huge_page != NULL); mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN); alignment = page_alignment; const size_t info_size = info_slices * MI_SEGMENT_SLICE_SIZE; align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN ); - segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE); - segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE; + const size_t extra = align_offset - info_size; + // recalculate due to potential guard pages + segment_slices = mi_segment_calculate_slices(required + extra, &pre_size, &info_slices); + //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE); + //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE; } - else { + const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE; + + // get from cache + if (page_alignment == 0) { segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld); } + // get from OS if (segment==NULL) { segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, &commit, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate diff --git a/test/test-api.c b/test/test-api.c index 01ef98bd..e7f3a4ed 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -149,7 +149,7 @@ int main(void) { for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) { void* ps[8]; for (int i = 0; i < 8 && ok; i++) { - ps[i] = mi_malloc_aligned(align*13 /*size*/, align); + ps[i] = mi_malloc_aligned(align*5 /*size*/, align); if (ps[i] == NULL || (uintptr_t)(ps[i]) % align != 0) { ok = false; } From 29405c7d70c931c890f53f3ad80243ba83220768 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Nov 2022 14:53:27 -0800 Subject: [PATCH 175/352] fix initializer --- src/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/init.c b/src/init.c index 38a38913..ca48fb10 100644 --- a/src/init.c +++ b/src/init.c @@ -111,7 +111,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { 0, // cookie 0, // arena id { 0, 0 }, // keys - { {0}, {0}, 0 }, + { {0}, {0}, 0, true }, 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next @@ -152,7 +152,7 @@ mi_heap_t _mi_heap_main = { 0, // initial cookie 0, // arena id { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) - { {0x846ca68b}, {0}, 0 }, // random + { {0x846ca68b}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next heap From 67439bb4e5b00f1144bf7516c75649fd29d5dd3e Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Nov 2022 17:12:14 -0800 Subject: [PATCH 176/352] add NULL check in _mi_segment_of --- src/segment-cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 34c8b029..436ce2bf 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -285,8 +285,9 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) { // Determine the segment belonging to a pointer or NULL if it is not in a valid segment. static mi_segment_t* _mi_segment_of(const void* p) { + if (p == NULL) return NULL; mi_segment_t* segment = _mi_ptr_segment(p); - if (segment == NULL) return NULL; + mi_assert_internal(segment != NULL); size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge From ba8c0f890314d80d830599f686eaa63aafcee880 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Nov 2022 17:21:03 -0800 Subject: [PATCH 177/352] avoid warning for large aligned blocks on linux --- src/alloc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 2b53ac22..f951370a 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -488,10 +488,16 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms #if (MI_DEBUG>0) if mi_unlikely(!mi_is_in_heap_region(p)) { - _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" - "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); - if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { - _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); + #if (MI_INTPTR_SIZE == 8 && defined(__linux__)) + if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640) + #else + { + #endif + _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); + if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); + } } } #endif From b940543cd582514b5f53bc6c317d5fcfd7f28f55 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 17 Nov 2022 18:57:45 -0800 Subject: [PATCH 178/352] experiment with smaller segment size (32MiB) and finer minimal commit (1MiB) --- include/mimalloc-types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 016bb684..3a4f8f6b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -132,7 +132,7 @@ typedef int32_t mi_ssize_t; // Main tuning parameters for segment and page sizes // Sizes for 64-bit (usually divide by two for 32-bit) -#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) +#define MI_SEGMENT_SLICE_SHIFT (12 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) #if MI_INTPTR_SIZE > 4 #define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB @@ -324,7 +324,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (2*MI_MiB) +#define MI_MINIMAL_COMMIT_SIZE (MI_MiB) #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS From 82a765a255b028cf57b9ddcf95a125cdf821da87 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 18 Nov 2022 09:38:01 -0800 Subject: [PATCH 179/352] experiment with 32KiB slices and increased MI_MIN_EXTEND --- include/mimalloc-types.h | 8 ++++---- src/page.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 3a4f8f6b..88905bdc 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -132,16 +132,16 @@ typedef int32_t mi_ssize_t; // Main tuning parameters for segment and page sizes // Sizes for 64-bit (usually divide by two for 32-bit) -#define MI_SEGMENT_SLICE_SHIFT (12 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) +#define MI_SEGMENT_SLICE_SHIFT (12 + MI_INTPTR_SHIFT) // 32KiB (32KiB on 32-bit) #if MI_INTPTR_SIZE > 4 -#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB +#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 32MiB #else #define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit #endif #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB -#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB +#define MI_MEDIUM_PAGE_SHIFT ( 4 + MI_SMALL_PAGE_SHIFT) // 512KiB // Derived constants @@ -324,7 +324,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (MI_MiB) +#define MI_MINIMAL_COMMIT_SIZE (2*MI_MiB) #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS diff --git a/src/page.c b/src/page.c index 2fa03606..cb957bf7 100644 --- a/src/page.c +++ b/src/page.c @@ -408,7 +408,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { } // Retire parameters -#define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX +#define MI_MAX_RETIRE_SIZE (MI_MEDIUM_OBJ_SIZE_MAX) #define MI_RETIRE_CYCLES (8) // Retire a page with no more used blocks @@ -579,7 +579,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co #if (MI_SECURE>0) #define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many #else -#define MI_MIN_EXTEND (1) +#define MI_MIN_EXTEND (4) #endif // Extend the capacity (up to reserved) by initializing a free list From 1a7f6f376d28571432ee4d3a498680da3c9dda89 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 21 Nov 2022 10:22:50 -0800 Subject: [PATCH 180/352] move threadid field --- include/mimalloc-types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index cf142748..d44ecc18 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -374,8 +374,9 @@ typedef struct mi_segment_s { // layout like this to optimize access in `mi_free` mi_segment_kind_t kind; - _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` + _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment + mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment } mi_segment_t; From 3ccf849c1a901f3c6fd11ad16c089d60ace4580a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 21 Nov 2022 15:02:41 -0800 Subject: [PATCH 181/352] more refined decommit extend delay --- src/segment.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index 3423bd53..25e63904 100644 --- a/src/segment.c +++ b/src/segment.c @@ -540,8 +540,12 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ } else if (segment->decommit_expire <= now) { // previous decommit mask already expired - // mi_segment_delayed_decommit(segment, true, stats); - segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) { + mi_segment_delayed_decommit(segment, true, stats); + } + else { + segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + } } else { // previous decommit mask is not yet expired, increase the expiration by a bit. From c0077471695338fb5a233a42341a052133ef0180 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 21 Nov 2022 15:03:15 -0800 Subject: [PATCH 182/352] back to 64k pages but 32MiB segments and a 1MiB minimal commit size --- include/mimalloc-types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index d44ecc18..399001c6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -132,10 +132,10 @@ typedef int32_t mi_ssize_t; // Main tuning parameters for segment and page sizes // Sizes for 64-bit (usually divide by two for 32-bit) -#define MI_SEGMENT_SLICE_SHIFT (12 + MI_INTPTR_SHIFT) // 32KiB (32KiB on 32-bit) +#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) #if MI_INTPTR_SIZE > 4 -#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 32MiB +#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB #else #define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit #endif @@ -325,7 +325,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (2*MI_MiB) +#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS From 83c027c4bf591b3154a6bab02dc343ff99837387 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 21 Nov 2022 18:56:56 -0800 Subject: [PATCH 183/352] fix medium page size to 512k --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 399001c6..9def491e 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -141,7 +141,7 @@ typedef int32_t mi_ssize_t; #endif #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB -#define MI_MEDIUM_PAGE_SHIFT ( 4 + MI_SMALL_PAGE_SHIFT) // 512KiB +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB // Derived constants From 20880807ce275b5ed23eb124d8a4b157eb042dd0 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 22 Nov 2022 22:05:18 -0800 Subject: [PATCH 184/352] remove comment --- src/options.c | 2 +- src/segment.c | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/options.c b/src/options.c index 6c6f8f2f..6980a047 100644 --- a/src/options.c +++ b/src/options.c @@ -94,7 +94,7 @@ static mi_option_desc_t options[_mi_option_last] = { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments - { 2, UNINIT, MI_OPTION(decommit_extend_delay) }, + { 1, UNINIT, MI_OPTION(decommit_extend_delay) }, { 0, UNINIT, MI_OPTION(destroy_on_exit)} // release all OS memory on process exit; careful with dangling pointer or after-exit frees! }; diff --git a/src/segment.c b/src/segment.c index 803bb47b..55ec4615 100644 --- a/src/segment.c +++ b/src/segment.c @@ -316,7 +316,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c ptrdiff_t idx = slice - segment->slices; size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; // make the start not OS page aligned for smaller blocks to avoid page/cache effects - size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); + size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); if (page_size != NULL) { *page_size = psize - start_offset; } return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } @@ -463,15 +463,6 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes. - /* - if (commit && size > 0) { - const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE); - if (p + csize <= mi_segment_end(segment)) { - size = csize; - } - } - */ // commit liberal, but decommit conservative uint8_t* start = NULL; size_t full_size = 0; From 9e56567d23126d0b608487fa4f11d5b59845e50f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 23 Nov 2022 09:50:29 -0800 Subject: [PATCH 185/352] fix decommit for huge objects --- src/alloc.c | 9 ++++++--- src/segment.c | 37 ++++++++++++++++--------------------- test/main-override.cpp | 18 ++++++++++++++++-- test/test-stress.c | 2 +- 4 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index f602fdcf..ac117f17 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -395,9 +395,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc #endif } - #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED // note: when tracking, cannot use mi_usable_size with multi-threading - memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + if (segment->kind != MI_SEGMENT_HUGE) { // not for huge segments as we just reset the content + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + } #endif // Try to put the block on either the page-local thread free list, or the heap delayed free list. @@ -449,7 +450,9 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + if (!mi_page_is_huge(page)) { // huge page content may be already decommitted + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + } #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; diff --git a/src/segment.c b/src/segment.c index 55ec4615..b054b975 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1522,25 +1522,23 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, #if MI_HUGE_PAGE_ABANDON segment->thread_id = 0; // huge segments are immediately abandoned #endif - - if (page_alignment > 0) { - size_t psize; - uint8_t* p = _mi_segment_page_start(segment, page, &psize); - uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)p, page_alignment); - mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment)); - mi_assert_internal(psize - (aligned_p - p) >= size); - if (!segment->allow_decommit) { - // decommit the part of the page that is unused; this can be quite large (close to MI_SEGMENT_SIZE) - uint8_t* decommit_start = p + sizeof(mi_block_t); // for the free list - ptrdiff_t decommit_size = aligned_p - decommit_start; - mi_segment_decommit(segment, decommit_start, decommit_size, &_mi_stats_main); - } - } + // for huge pages we initialize the xblock_size as we may // overallocate to accommodate large alignments. size_t psize; - _mi_segment_page_start(segment, page, &psize); + uint8_t* start = _mi_segment_page_start(segment, page, &psize); page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize); + + // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) + if (page_alignment > 0 && segment->allow_decommit) { + uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment); + mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment)); + mi_assert_internal(psize - (aligned_p - start) >= size); + uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list + ptrdiff_t decommit_size = aligned_p - decommit_start; + _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + } + return page; } @@ -1579,13 +1577,10 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc mi_assert_internal(segment == _mi_page_segment(page)); mi_assert_internal(page->used == 1); // this is called just before the free mi_assert_internal(page->free == NULL); - const size_t csize = mi_page_block_size(page) - sizeof(mi_block_t); - uint8_t* p = ( uint8_t*)block + sizeof(mi_block_t); if (segment->allow_decommit) { - mi_segment_decommit(segment, p, csize, &_mi_stats_main); - } - else { - _mi_os_reset(p, csize, &_mi_stats_main); + const size_t csize = mi_usable_size(block) - sizeof(mi_block_t); + uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); + _mi_os_decommit(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } } #endif diff --git a/test/main-override.cpp b/test/main-override.cpp index b205dc85..e12567d9 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -37,12 +37,14 @@ static void fail_aslr(); // issue #372 static void tsan_numa_test(); // issue #414 static void strdup_test(); // issue #445 static void bench_alloc_large(void); // issue #xxx +static void heap_thread_free_huge(); static void test_stl_allocators(); int main() { mi_stats_reset(); // ignore earlier allocations - + heap_thread_free_huge(); + /* heap_thread_free_large(); heap_no_delete(); heap_late_free(); @@ -51,7 +53,7 @@ int main() { large_alloc(); tsan_numa_test(); strdup_test(); - + */ test_stl_allocators(); test_mt_shutdown(); @@ -240,6 +242,18 @@ static void heap_thread_free_large() { } } +static void heap_thread_free_huge_worker() { + mi_free(shared_p); +} + +static void heap_thread_free_huge() { + for (int i = 0; i < 100; i++) { + shared_p = mi_malloc(1024 * 1024 * 1024); + auto t1 = std::thread(heap_thread_free_large_worker); + t1.join(); + } +} + static void test_mt_shutdown() diff --git a/test/test-stress.c b/test/test-stress.c index 61171d03..b766a5ca 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -91,7 +91,7 @@ static bool chance(size_t perc, random_t r) { static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) { - if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant + if (chance(1, r) && allow_large_objects) items *= 50000; // 0.01% giant else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge else items *= 100; // 1% large objects; } From 58d12723d6817bb9e8141bc67651ea2a76900970 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 23 Nov 2022 10:34:19 -0800 Subject: [PATCH 186/352] make mi_collect(true) actually free the segment caches --- src/segment-cache.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 26786c92..d93fd644 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -133,14 +133,14 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo #define MI_MAX_PURGE_PER_PUSH (4) -static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld) +static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) { MI_UNUSED(tld); if (!mi_option_is_enabled(mi_option_allow_decommit)) return; mi_msecs_t now = _mi_clock_now(); size_t purged = 0; - const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); - size_t idx = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); + const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); + size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots if (idx >= MI_CACHE_MAX) idx = 0; // wrap mi_cache_slot_t* slot = &cache[idx]; @@ -164,13 +164,19 @@ static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld } _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } - if (!force && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push + if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push } } } void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { - mi_segment_cache_purge(force, tld ); + if (force) { + // called on `mi_collect(true)` but not on thread termination + _mi_segment_cache_free_all(tld); + } + else { + mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); + } } void _mi_segment_cache_free_all(mi_os_tld_t* tld) { @@ -215,7 +221,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me } // purge expired entries - mi_segment_cache_purge(false /* force? */, tld); + mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); // find an available slot mi_bitmap_index_t bitidx; From 6988bbcca04e0c10db1da642676068dd89895f3d Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 27 Nov 2022 12:01:56 -0800 Subject: [PATCH 187/352] fix duplicate definition (issue #652 --- test/main-override.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/test/main-override.cpp b/test/main-override.cpp index f1c9a10b..9704a760 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -254,19 +254,6 @@ static void heap_thread_free_huge() { } } -static void heap_thread_free_huge_worker() { - mi_free(shared_p); -} - -static void heap_thread_free_huge() { - for (int i = 0; i < 10; i++) { - shared_p = mi_malloc(1024 * 1024 * 1024); - auto t1 = std::thread(heap_thread_free_large_worker); - t1.join(); - } -} - - static void test_mt_shutdown() { const int threads = 5; From aea0de4777e4d59092bfaabd226175b29438c9f0 Mon Sep 17 00:00:00 2001 From: Ganesan Rajagopal Date: Sat, 3 Dec 2022 16:27:33 +0530 Subject: [PATCH 188/352] Fix typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 58863099..d19a4a1c 100644 --- a/readme.md +++ b/readme.md @@ -78,7 +78,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page (see [below](#performance)); please report if you observe any significant performance regression. * 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow detection. Initial - support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . + support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . * 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix From 0f796a56a98b224dc645cd2260b95e06139570cf Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 20 Dec 2022 18:59:55 -0800 Subject: [PATCH 189/352] fix bug where eager committed memory would be immediatedy decommitted; possible fix for issue #669 --- src/alloc.c | 4 ++-- src/segment.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 7bea69e9..b8270f1a 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -959,7 +959,7 @@ static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) { } -mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { void* p = mi_heap_malloc(heap,size); if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false); return p; @@ -970,7 +970,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) { } -mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { size_t total; if mi_unlikely(mi_count_size_overflow(count, size, &total)) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc diff --git a/src/segment.c b/src/segment.c index 184197ef..5b4dbc7a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -574,7 +574,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) { } // note: can be called on abandoned segments -static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { +static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) ? NULL : mi_span_queue_for(slice_count,tld)); @@ -594,7 +594,9 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size } // perhaps decommit - mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats); + if (allow_decommit) { + mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); + } // and push it on the free page queue (if it was not a huge page) if (sq != NULL) mi_span_queue_push( sq, slice ); @@ -656,12 +658,12 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ } // and add the new free page - mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld); + mi_segment_span_free(segment, mi_slice_index(slice), slice_count, true, tld); return slice; } -static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) { +static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_segment(slice)==segment); mi_assert_internal(slice->slice_count >= slice_count); mi_assert_internal(slice->xblock_size > 0); // no more in free queue @@ -669,7 +671,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); size_t next_index = mi_slice_index(slice) + slice_count; size_t next_count = slice->slice_count - slice_count; - mi_segment_span_free(segment, next_index, next_count, tld); + mi_segment_span_free(segment, next_index, next_count, allow_decommit, tld); slice->slice_count = (uint32_t)slice_count; } @@ -738,7 +740,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren mi_span_queue_delete(sq, slice); if (slice->slice_count > slice_count) { - mi_segment_slice_split(segment, slice, slice_count, tld); + mi_segment_slice_split(segment, slice, slice_count, false /* don't decommit */, tld); } mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); @@ -872,7 +874,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); if (segment->allow_decommit) { - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); + segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay); segment->decommit_mask = decommit_mask; mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); #if MI_DEBUG>2 @@ -919,7 +921,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // initialize initial free pages if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page mi_assert_internal(huge_page==NULL); - mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, tld); + mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld); } else { mi_assert_internal(huge_page!=NULL); From d1fff1119a52e15c1c3807efe1077024c39fe70e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 21 Dec 2022 12:19:09 -0800 Subject: [PATCH 190/352] reorganize span free code --- src/segment.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/segment.c b/src/segment.c index 5b4dbc7a..dc98e3e7 100644 --- a/src/segment.c +++ b/src/segment.c @@ -406,7 +406,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { /* ----------------------------------------------------------- - Span management + Commit/Decommit ranges ----------------------------------------------------------- */ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) { @@ -569,6 +569,10 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st } +/* ----------------------------------------------------------- + Span free +----------------------------------------------------------- */ + static bool mi_segment_is_abandoned(mi_segment_t* segment) { return (segment->thread_id == 0); } @@ -663,17 +667,10 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ } -static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) { - mi_assert_internal(_mi_ptr_segment(slice)==segment); - mi_assert_internal(slice->slice_count >= slice_count); - mi_assert_internal(slice->xblock_size > 0); // no more in free queue - if (slice->slice_count <= slice_count) return; - mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - size_t next_index = mi_slice_index(slice) + slice_count; - size_t next_count = slice->slice_count - slice_count; - mi_segment_span_free(segment, next_index, next_count, allow_decommit, tld); - slice->slice_count = (uint32_t)slice_count; -} + +/* ----------------------------------------------------------- + Page allocation +----------------------------------------------------------- */ // Note: may still return NULL if committing the memory failed static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { @@ -725,6 +722,18 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i return page; } +static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(_mi_ptr_segment(slice) == segment); + mi_assert_internal(slice->slice_count >= slice_count); + mi_assert_internal(slice->xblock_size > 0); // no more in free queue + if (slice->slice_count <= slice_count) return; + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); + size_t next_index = mi_slice_index(slice) + slice_count; + size_t next_count = slice->slice_count - slice_count; + mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld); + slice->slice_count = (uint32_t)slice_count; +} + static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX); // search from best fit up @@ -740,7 +749,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren mi_span_queue_delete(sq, slice); if (slice->slice_count > slice_count) { - mi_segment_slice_split(segment, slice, slice_count, false /* don't decommit */, tld); + mi_segment_slice_split(segment, slice, slice_count, tld); } mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); From 7bb34e056c383e31ee9016ed46af42a53dceced8 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 23 Dec 2022 13:35:50 -0800 Subject: [PATCH 191/352] fix readme --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 364b974b..5f7c8867 100644 --- a/readme.md +++ b/readme.md @@ -12,7 +12,7 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the run-time systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.0.9` (2022-12-23). +Latest release tag: `v2.0.9` (2022-12-23). Latest stable tag: `v1.7.9` (2022-12-23). mimalloc is a drop-in replacement for `malloc` and can be used in other programs From 1e4b6b734e06b9f4723826e0db375987a44d5aac Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 31 Jan 2023 16:02:35 -0800 Subject: [PATCH 192/352] fix assertion that was too strict (issue #691) --- src/segment.c | 3 ++- test/main-override.cpp | 45 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/src/segment.c b/src/segment.c index dc98e3e7..683e413c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -632,7 +632,8 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ // for huge pages, just mark as free but don't add to the queues if (segment->kind == MI_SEGMENT_HUGE) { - mi_assert_internal(segment->used == 1); // decreased right after this call in `mi_segment_page_clear` + // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case) + mi_assert_internal((segment->used==0 && slice->xblock_size==0) || segment->used == 1); // decreased right after this call in `mi_segment_page_clear` slice->xblock_size = 0; // mark as free anyways // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to // avoid a possible cache miss (and the segment is about to be freed) diff --git a/test/main-override.cpp b/test/main-override.cpp index 7242eb29..40787831 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -37,15 +37,16 @@ static void fail_aslr(); // issue #372 static void tsan_numa_test(); // issue #414 static void strdup_test(); // issue #445 static void bench_alloc_large(void); // issue #xxx +static void test_large_migrate(void); // issue #691 static void heap_thread_free_huge(); static void test_stl_allocators(); int main() { - mi_stats_reset(); // ignore earlier allocations - heap_thread_free_huge(); + mi_stats_reset(); // ignore earlier allocations /* + heap_thread_free_huge(); heap_thread_free_large(); heap_no_delete(); heap_late_free(); @@ -55,8 +56,9 @@ int main() { tsan_numa_test(); strdup_test(); */ - test_stl_allocators(); - test_mt_shutdown(); + // test_stl_allocators(); + // test_mt_shutdown(); + test_large_migrate(); //fail_aslr(); bench_alloc_large(); @@ -171,6 +173,41 @@ static void test_stl_allocators() { test_stl_allocator6(); } + +// issue #691 +static char* cptr; + +static void* thread1_allocate() +{ + cptr = mi_calloc_tp(char,22085632); + return NULL; +} + +static void* thread2_free() +{ + assert(cptr); + mi_free(cptr); + cptr = NULL; + return NULL; +} + +static void test_large_migrate(void) { + auto t1 = std::thread(thread1_allocate); + t1.join(); + auto t2 = std::thread(thread2_free); + t2.join(); + /* + pthread_t thread1, thread2; + + pthread_create(&thread1, NULL, &thread1_allocate, NULL); + pthread_join(thread1, NULL); + + pthread_create(&thread2, NULL, &thread2_free, NULL); + pthread_join(thread2, NULL); + */ + return; +} + // issue 445 static void strdup_test() { #ifdef _MSC_VER From 8be4cee4186120c32def9789c450a185c7213914 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 16 Nov 2022 18:52:40 -0800 Subject: [PATCH 193/352] change max align size to 8 --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index f3af528e..a7c4d3c6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -19,7 +19,7 @@ terms of the MIT license. A copy of the license can be found in the file // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `sizeof(void*)` #ifndef MI_MAX_ALIGN_SIZE -#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) +#define MI_MAX_ALIGN_SIZE 8 // sizeof(max_align_t) #endif // ------------------------------------------------------ From 5fe4a3480ffb2aa21bf12b1e92c220ab575a582f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Feb 2023 12:21:06 -0800 Subject: [PATCH 194/352] revert default max align commit back to 16 --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index a7c4d3c6..f3af528e 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -19,7 +19,7 @@ terms of the MIT license. A copy of the license can be found in the file // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `sizeof(void*)` #ifndef MI_MAX_ALIGN_SIZE -#define MI_MAX_ALIGN_SIZE 8 // sizeof(max_align_t) +#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) #endif // ------------------------------------------------------ From 6f31115c7f5e62a33eb8fe99888591bc6c045173 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 5 Mar 2023 22:11:42 -0800 Subject: [PATCH 195/352] fix segment defined memory for valgrind --- src/segment.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index 57ba7068..2698d578 100644 --- a/src/segment.c +++ b/src/segment.c @@ -874,11 +874,18 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi if (segment == NULL) return NULL; // zero the segment info? -- not always needed as it may be zero initialized from the OS + mi_track_mem_defined(segment, offsetof(mi_segment_t, next)); // needed for valgrind mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan - if (!is_zero) { + { ptrdiff_t ofs = offsetof(mi_segment_t, next); size_t prefix = offsetof(mi_segment_t, slices) - ofs; - memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1)); // one more + size_t zsize = prefix + sizeof(mi_slice_t) * (segment_slices + 1); // one more + if (!is_zero) { + memset((uint8_t*)segment + ofs, 0, zsize); + } + else { + mi_track_mem_defined((uint8_t*)segment + ofs, zsize); // todo: somehow needed for valgrind? + } } segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed From 7ec798e19726d4314b90c61c68202457a380b1fa Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 5 Mar 2023 22:54:10 -0800 Subject: [PATCH 196/352] make test-stress match the one in dev --- test/test-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-stress.c b/test/test-stress.c index 8b96a5ae..69650556 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -91,7 +91,7 @@ static bool chance(size_t perc, random_t r) { static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) { - if (chance(1, r) && allow_large_objects) items *= 50000; // 0.01% giant + if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge else items *= 100; // 1% large objects; } From a90737a7fa445b3a1afcb899c162cf670bc473fb Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 6 Mar 2023 10:44:43 -0800 Subject: [PATCH 197/352] fix valgrind tracking for zero initialized segments --- src/segment.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/segment.c b/src/segment.c index 2698d578..78171907 100644 --- a/src/segment.c +++ b/src/segment.c @@ -796,8 +796,6 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment const size_t extra = align_offset - info_size; // recalculate due to potential guard pages *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices); - //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE); - //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE; } const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = NULL; @@ -831,7 +829,10 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment if (!ok) return NULL; // failed to commit mi_commit_mask_set(pcommit_mask, &commit_needed_mask); } - mi_track_mem_undefined(segment,commit_needed*MI_COMMIT_SIZE); + else if (*is_zero) { + // track zero initialization for valgrind + mi_track_mem_defined(segment, commit_needed * MI_COMMIT_SIZE); + } segment->memid = memid; segment->mem_is_pinned = is_pinned; segment->mem_is_large = mem_large; @@ -874,18 +875,14 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi if (segment == NULL) return NULL; // zero the segment info? -- not always needed as it may be zero initialized from the OS - mi_track_mem_defined(segment, offsetof(mi_segment_t, next)); // needed for valgrind mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan { - ptrdiff_t ofs = offsetof(mi_segment_t, next); + ptrdiff_t ofs = offsetof(mi_segment_t, next); size_t prefix = offsetof(mi_segment_t, slices) - ofs; - size_t zsize = prefix + sizeof(mi_slice_t) * (segment_slices + 1); // one more + size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more if (!is_zero) { memset((uint8_t*)segment + ofs, 0, zsize); - } - else { - mi_track_mem_defined((uint8_t*)segment + ofs, zsize); // todo: somehow needed for valgrind? - } + } } segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed From 8fbe7aae50a959cbb5324f675dcfd8c4ff18312d Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 19 Mar 2023 19:11:43 -0700 Subject: [PATCH 198/352] update process info primitive api --- src/prim/prim.c | 2 +- src/prim/prim.h | 16 +++++++++++++--- src/prim/unix/prim.c | 36 ++++++++++++++---------------------- src/prim/wasi/prim.c | 12 ++++-------- src/prim/windows/prim.c | 16 ++++++++-------- src/stats.c | 36 +++++++++++++++++++----------------- 6 files changed, 59 insertions(+), 59 deletions(-) diff --git a/src/prim/prim.c b/src/prim/prim.c index eec13c48..109ab8e8 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "windows/prim.c" // VirtualAlloc (Windows) #elif defined(__wasi__) #define MI_USE_SBRK -#include "wasi/prim.h" // memory-grow or sbrk (Wasm) +#include "wasi/prim.c" // memory-grow or sbrk (Wasm) #else #include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) #endif diff --git a/src/prim/prim.h b/src/prim/prim.h index 967c6698..3130d489 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -59,9 +59,18 @@ size_t _mi_prim_numa_node_count(void); mi_msecs_t _mi_prim_clock_now(void); // Return process information (only for statistics) -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, - size_t* current_rss, size_t* peak_rss, - size_t* current_commit, size_t* peak_commit, size_t* page_faults); +typedef struct mi_process_info_s { + mi_msecs_t elapsed; + mi_msecs_t utime; + mi_msecs_t stime; + size_t current_rss; + size_t peak_rss; + size_t current_commit; + size_t peak_commit; + size_t page_faults; +} mi_process_info_t; + +void _mi_prim_process_info(mi_process_info_t* pinfo); // Default stderr output. (only for warnings etc. with verbose enabled) // msg != NULL && _mi_strlen(msg) > 0 @@ -202,6 +211,7 @@ This is inlined here as it is on the fast path for allocation functions. On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures that the storage will always be available (allocated on the thread stacks). + On some platforms though we cannot use that when overriding `malloc` since the underlying TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. We try to circumvent this in an efficient way: diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index d1cd4301..1040c791 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -541,19 +541,15 @@ static mi_msecs_t timeval_secs(const struct timeval* tv) { return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +void _mi_prim_process_info(mi_process_info_t* pinfo) { struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); - *utime = timeval_secs(&rusage.ru_utime); - *stime = timeval_secs(&rusage.ru_stime); + pinfo->utime = timeval_secs(&rusage.ru_utime); + pinfo->stime = timeval_secs(&rusage.ru_stime); #if !defined(__HAIKU__) - *page_faults = rusage.ru_majflt; -#endif - // estimate commit using our stats - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *current_rss = *current_commit; // estimate + pinfo->page_faults = rusage.ru_majflt; +#endif #if defined(__HAIKU__) // Haiku does not have (yet?) a way to // get these stats per process @@ -562,19 +558,20 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current ssize_t c; get_thread_info(find_thread(0), &tid); while (get_next_area_info(tid.team, &c, &mem) == B_OK) { - *peak_rss += mem.ram_size; + pinfo->peak_rss += mem.ram_size; } - *page_faults = 0; + pinfo->page_faults = 0; #elif defined(__APPLE__) - *peak_rss = rusage.ru_maxrss; // BSD reports in bytes + pinfo->peak_rss = rusage.ru_maxrss; // BSD reports in bytes struct mach_task_basic_info info; mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { - *current_rss = (size_t)info.resident_size; + pinfo->current_rss = (size_t)info.resident_size; } #else - *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB + pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB #endif + // use defaults for commit } #else @@ -584,15 +581,10 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current #pragma message("define a way to get process info") #endif -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +void _mi_prim_process_info(mi_process_info_t* pinfo) { - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *peak_rss = *peak_commit; - *current_rss = *current_commit; - *page_faults = 0; - *utime = 0; - *stime = 0; + // use defaults + MI_UNUSED(pinfo); } #endif diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index b8ac1a1b..89c04d78 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -194,17 +194,13 @@ mi_msecs_t _mi_prim_clock_now(void) { // Process info //---------------------------------------------------------------- -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +void _mi_prim_process_info(mi_process_info_t* pinfo) { - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *peak_rss = *peak_commit; - *current_rss = *current_commit; - *page_faults = 0; - *utime = 0; - *stime = 0; + // use defaults + MI_UNUSED(pinfo); } + //---------------------------------------------------------------- // Output //---------------------------------------------------------------- diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 2fa445a1..1ce44a10 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -428,15 +428,15 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) { typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +void _mi_prim_process_info(mi_process_info_t* pinfo) { FILETIME ct; FILETIME ut; FILETIME st; FILETIME et; GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_msecs(&ut); - *stime = filetime_msecs(&st); + pinfo->utime = filetime_msecs(&ut); + pinfo->stime = filetime_msecs(&st); // load psapi on demand if (pGetProcessMemoryInfo == NULL) { @@ -452,11 +452,11 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current if (pGetProcessMemoryInfo != NULL) { pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); } - *current_rss = (size_t)info.WorkingSetSize; - *peak_rss = (size_t)info.PeakWorkingSetSize; - *current_commit = (size_t)info.PagefileUsage; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_faults = (size_t)info.PageFaultCount; + pinfo->current_rss = (size_t)info.WorkingSetSize; + pinfo->peak_rss = (size_t)info.PeakWorkingSetSize; + pinfo->current_commit = (size_t)info.PagefileUsage; + pinfo->peak_commit = (size_t)info.PeakPagefileUsage; + pinfo->page_faults = (size_t)info.PageFaultCount; } //---------------------------------------------------------------- diff --git a/src/stats.c b/src/stats.c index 357bebce..4bc8835c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -430,21 +430,23 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { - mi_msecs_t elapsed = _mi_clock_end(mi_process_start); - mi_msecs_t utime = 0; - mi_msecs_t stime = 0; - size_t current_rss0 = 0; - size_t peak_rss0 = 0; - size_t current_commit0 = 0; - size_t peak_commit0 = 0; - size_t page_faults0 = 0; - _mi_prim_process_info(&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); - if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); - if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX)); - if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX)); - if (current_rss!=NULL) *current_rss = current_rss0; - if (peak_rss!=NULL) *peak_rss = peak_rss0; - if (current_commit!=NULL) *current_commit = current_commit0; - if (peak_commit!=NULL) *peak_commit = peak_commit0; - if (page_faults!=NULL) *page_faults = page_faults0; + mi_process_info_t pinfo = { 0 }; + pinfo.elapsed = _mi_clock_end(mi_process_start); + pinfo.utime = 0; + pinfo.stime = 0; + pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_rss = pinfo.current_commit; + pinfo.peak_rss = pinfo.peak_commit; + pinfo.page_faults = 0; + + _mi_prim_process_info(&pinfo); + if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX)); + if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX)); + if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX)); + if (current_rss!=NULL) *current_rss = pinfo.current_rss; + if (peak_rss!=NULL) *peak_rss = pinfo.peak_rss; + if (current_commit!=NULL) *current_commit = pinfo.current_commit; + if (peak_commit!=NULL) *peak_commit = pinfo.peak_commit; + if (page_faults!=NULL) *page_faults = pinfo.page_faults; } From 99c9f55511ea62e80cf7dd28182799a940d4b6bd Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 19 Mar 2023 20:21:20 -0700 Subject: [PATCH 199/352] simplify primitives API --- src/os.c | 27 +++++++++++++------ src/prim/prim.h | 7 ++--- src/prim/unix/prim.c | 60 ++++++++++++++++++++++------------------- src/prim/wasi/prim.c | 19 ++++++++----- src/prim/windows/prim.c | 28 ++++++++++--------- 5 files changed, 83 insertions(+), 58 deletions(-) diff --git a/src/os.c b/src/os.c index 56f96bf2..5263af42 100644 --- a/src/os.c +++ b/src/os.c @@ -135,7 +135,10 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats MI_UNUSED(tld_stats); mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) - _mi_prim_free(addr, size); + int err = _mi_prim_free(addr, size); + if (err != 0) { + _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); + } mi_stats_t* stats = &_mi_stats_main; if (was_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); @@ -163,7 +166,11 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo if (!commit) allow_large = false; if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning - void* p = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large); + void* p = NULL; + int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p); + if (err != 0) { + _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); + } /* if (commit && allow_large) { p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); @@ -200,7 +207,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { mi_os_mem_free(p, size, commit, stats); - _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit); + _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; @@ -357,7 +364,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ int err = _mi_prim_commit(start, csize, commit); if (err != 0) { - _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + _mi_warning_message("cannot %s OS memory (error: %d (0x%d), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize); } mi_assert_internal(err == 0); return (err == 0); @@ -404,7 +411,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) int err = _mi_prim_reset(start, csize); if (err != 0) { - _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, err); + _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } return (err == 0); } @@ -441,7 +448,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { */ int err = _mi_prim_protect(start,csize,protect); if (err != 0) { - _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err); + _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize); } return (err == 0); } @@ -516,13 +523,17 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse for (page = 0; page < pages; page++) { // allocate a page void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); - void* p = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + void* p = NULL; + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p); + if (err != 0) { + _mi_warning_message("unable to allocate huge OS page (error: %d (0x%d), address: %p, size: %zx bytes)", err, err, addr, MI_HUGE_OS_PAGE_SIZE); + } // Did we succeed at a contiguous address? if (p != addr) { // no success, issue a warning and break if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); + _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); } break; diff --git a/src/prim/prim.h b/src/prim/prim.h index 3130d489..1a4fb5d8 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -11,6 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file // note: on all primitive functions, we always get: // addr != NULL and page aligned // size > 0 and page aligned +// return value is an error code an int where 0 is success. // OS memory configuration typedef struct mi_os_mem_config_s { @@ -25,13 +26,13 @@ typedef struct mi_os_mem_config_s { void _mi_prim_mem_init( mi_os_mem_config_t* config ); // Free OS memory -void _mi_prim_free(void* addr, size_t size ); +int _mi_prim_free(void* addr, size_t size ); // Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. // pre: !commit => !allow_large // try_alignment >= _mi_os_page_size() and a power of 2 -void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large); +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr); // Commit memory. Returns error code or 0 on success. int _mi_prim_commit(void* addr, size_t size, bool commit); @@ -47,7 +48,7 @@ int _mi_prim_protect(void* addr, size_t size, bool protect); // pre: size > 0 and a multiple of 1GiB. // addr is either NULL or an address hint. // numa_node is either negative (don't care), or a numa node number. -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node); +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr); // Return the current NUMA node size_t _mi_prim_numa_node(void); diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 1040c791..5a3ca5ab 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -96,11 +96,9 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { // free //--------------------------------------------- -void _mi_prim_free(void* addr, size_t size ) { +int _mi_prim_free(void* addr, size_t size ) { bool err = (munmap(addr, size) == -1); - if (err) { - _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size); - } + return (err ? errno : 0); } @@ -118,19 +116,24 @@ static int unix_madvise(void* addr, size_t size, int advice) { static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { MI_UNUSED(try_alignment); + void* p = NULL; #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); - void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%d), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } if (p!=MAP_FAILED) return p; - // fall back to regular mmap + // fall back to regular mmap } } #elif defined(MAP_ALIGN) // Solaris if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment if (p!=MAP_FAILED) return p; // fall back to regular mmap } @@ -140,14 +143,18 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p if (addr == NULL) { void* hint = _mi_os_get_aligned_hint(try_alignment, size); if (hint != NULL) { - void* p = mmap(hint, size, protect_flags, flags, fd, 0); + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%d), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } if (p!=MAP_FAILED) return p; - // fall back to regular mmap + // fall back to regular mmap } } #endif // regular mmap - void* p = mmap(addr, size, protect_flags, flags, fd, 0); + p = mmap(addr, size, protect_flags, flags, fd, 0); if (p!=MAP_FAILED) return p; // failed to allocate return NULL; @@ -217,7 +224,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); } @@ -258,20 +265,18 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec #endif } } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); - } return p; } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - return unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : errno); } @@ -379,28 +384,29 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co } #endif -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { bool is_large = true; - void* p = unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); - if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes unsigned long numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_prim_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { - _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno)); - } + err = errno; + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%d))\n", numa_node, err, err); + } } - return p; + return (*addr != NULL ? 0 : errno); } #else -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); - return NULL; +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *addr = NULL; + return ENOMEM; } #endif diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index 89c04d78..f995304f 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -27,9 +27,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { // Free //--------------------------------------------- -void _mi_prim_free(void* addr, size_t size ) { +int _mi_prim_free(void* addr, size_t size ) { MI_UNUSED(addr); MI_UNUSED(size); // wasi heap cannot be shrunk + return 0; } @@ -101,20 +102,23 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { } } } + /* if (p == NULL) { _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); errno = ENOMEM; return NULL; } - mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); + */ + mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); return p; } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { MI_UNUSED(allow_large); MI_UNUSED(commit); *is_large = false; - return mi_prim_mem_grow(size, try_alignment); + *addr = mi_prim_mem_grow(size, try_alignment); + return (*addr != NULL ? 0 : ENOMEM); } @@ -142,9 +146,10 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) { // Huge pages and NUMA nodes //--------------------------------------------- -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); - return NULL; +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *addr = NULL; + return ENOSYS; } size_t _mi_prim_numa_node(void) { diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 1ce44a10..1e15273a 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -156,7 +156,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) // Free //--------------------------------------------- -void _mi_prim_free(void* addr, size_t size ) { +int _mi_prim_free(void* addr, size_t size ) { DWORD errcode = 0; bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); if (err) { errcode = GetLastError(); } @@ -172,9 +172,7 @@ void _mi_prim_free(void* addr, size_t size ) { if (err) { errcode = GetLastError(); } } } - if (errcode != 0) { - _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size); - } + return (int)errcode; } @@ -240,19 +238,18 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW *is_large = ((flags&MEM_LARGE_PAGES) != 0); p = win_virtual_alloc_prim(addr, size, try_alignment, flags); } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); - } + //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); } return p; } -void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); int flags = MEM_RESERVE; if (commit) { flags |= MEM_COMMIT; } - return win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : (int)GetLastError()); } @@ -296,7 +293,7 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) { // Huge page allocation //--------------------------------------------- -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) +static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node) { const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; @@ -315,7 +312,7 @@ void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) params[1].Arg.ULong = (unsigned)numa_node; } SIZE_T psize = size; - void* base = addr; + void* base = hint_addr; NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); if (err == 0 && base != NULL) { return base; @@ -330,11 +327,16 @@ void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type.Type = MiMemExtendedParameterNumaNode; params[0].Arg.ULong = (unsigned)numa_node; - return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1); } // otherwise use regular virtual alloc on older windows - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); + return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE); +} + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node); + return (*addr != NULL ? 0 : (int)GetLastError()); } From 993c0a49b4196c807da226b6796a262a062ff1eb Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:06:28 -0700 Subject: [PATCH 200/352] fix includes --- src/segment-cache.c | 6 +++--- src/static.c | 7 ------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index d93fd644..4a16a18a 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -11,10 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file The full memory map of all segments is also implemented here. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" -#include "bitmap.h" // atomic bitmap +#include "./bitmap.h" // atomic bitmap //#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache diff --git a/src/static.c b/src/static.c index a71cddca..d992f4da 100644 --- a/src/static.c +++ b/src/static.c @@ -29,15 +29,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "init.c" #include "options.c" #include "os.c" -<<<<<<< HEAD -#include "page.c" -#include "prim/prim.c" -#include "random.c" -======= #include "page.c" // includes page-queue.c #include "random.c" -#include "region.c" ->>>>>>> dev-platform #include "segment.c" #include "segment-cache.c" #include "stats.c" From 90f866c5bcc77496bf19df7bc85ab8a42b6b2490 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:45:34 -0700 Subject: [PATCH 201/352] fix warnings for issues #709 --- src/segment.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/segment.c b/src/segment.c index 648116c3..1b73b19a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -959,7 +959,9 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t // Remove the free pages mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); + #if MI_DEBUG>1 size_t page_count = 0; + #endif while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); @@ -967,7 +969,9 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t if (slice->xblock_size == 0 && segment->kind != MI_SEGMENT_HUGE) { mi_segment_span_remove_from_queue(slice, tld); } + #if MI_DEBUG>1 page_count++; + #endif slice = slice + slice->slice_count; } mi_assert_internal(page_count == 2); // first page is allocated by the segment itself From 4bf63300b3935245559833887277e4aa06814940 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 12:29:36 -0700 Subject: [PATCH 202/352] fix alignment issue #700 --- ide/vs2022/mimalloc.vcxproj | 2 +- src/segment.c | 4 +++- test/main-override-static.c | 9 +++++++++ test/test-api.c | 18 ++++++++++++++++++ 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 07a854ab..894c5030 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -116,7 +116,7 @@ true Default ../../include - MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions); + MI_DEBUG=0;MI_SECURE=0;%(PreprocessorDefinitions); CompileAsCpp false stdcpp20 diff --git a/src/segment.c b/src/segment.c index 1b73b19a..1e23bb1a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -316,7 +316,9 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c ptrdiff_t idx = slice - segment->slices; size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; // make the start not OS page aligned for smaller blocks to avoid page/cache effects - size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); + // note: the offset must always be an xblock_size multiple since we assume small allocations + // are aligned (see `mi_heap_malloc_aligned`). + size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 512 ? xblock_size : 0); if (page_size != NULL) { *page_size = psize - start_offset; } return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } diff --git a/test/main-override-static.c b/test/main-override-static.c index 534c8849..5e8b7333 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -20,6 +20,7 @@ static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); static void test_heap_arena(void); +static void test_align(void); int main() { mi_version(); @@ -37,6 +38,7 @@ int main() { // alloc_huge(); // test_heap_walk(); // test_heap_arena(); + test_align(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -68,6 +70,13 @@ int main() { return 0; } +static void test_align() { + void* p = mi_malloc_aligned(256, 256); + if (((uintptr_t)p % 256) != 0) { + fprintf(stderr, "%p is not 256 alignend!\n", p); + } +} + static void invalid_free() { free((void*)0xBADBEEF); realloc((void*)0xBADBEEF,10); diff --git a/test/test-api.c b/test/test-api.c index c78e1972..1967dad7 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -212,6 +212,24 @@ int main(void) { result = mi_heap_contains_block(heap, p); mi_heap_destroy(heap); } + CHECK_BODY("malloc-aligned12") { + bool ok = true; + const size_t align = 256; + for (int j = 1; j < 1000; j++) { + void* ps[1000]; + for (int i = 0; i < 1000 && ok; i++) { + ps[i] = mi_malloc_aligned(j // size + , align); + if (ps[i] == NULL || ((uintptr_t)(ps[i]) % align) != 0) { + ok = false; + } + } + for (int i = 0; i < 1000 && ok; i++) { + mi_free(ps[i]); + } + } + result = ok; + }; CHECK_BODY("malloc-aligned-at1") { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); }; From c935521bf92acfa539ea5f896e0f6d12611c9d5d Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 12:32:41 -0700 Subject: [PATCH 203/352] fix test and project --- ide/vs2022/mimalloc.vcxproj | 2 +- test/main-override-static.c | 2 +- test/test-api.c | 18 ------------------ 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 894c5030..07a854ab 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -116,7 +116,7 @@ true Default ../../include - MI_DEBUG=0;MI_SECURE=0;%(PreprocessorDefinitions); + MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions); CompileAsCpp false stdcpp20 diff --git a/test/main-override-static.c b/test/main-override-static.c index 5e8b7333..e71be29e 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -38,7 +38,7 @@ int main() { // alloc_huge(); // test_heap_walk(); // test_heap_arena(); - test_align(); + // test_align(); void* p1 = malloc(78); void* p2 = malloc(24); diff --git a/test/test-api.c b/test/test-api.c index 1967dad7..c78e1972 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -212,24 +212,6 @@ int main(void) { result = mi_heap_contains_block(heap, p); mi_heap_destroy(heap); } - CHECK_BODY("malloc-aligned12") { - bool ok = true; - const size_t align = 256; - for (int j = 1; j < 1000; j++) { - void* ps[1000]; - for (int i = 0; i < 1000 && ok; i++) { - ps[i] = mi_malloc_aligned(j // size - , align); - if (ps[i] == NULL || ((uintptr_t)(ps[i]) % align) != 0) { - ok = false; - } - } - for (int i = 0; i < 1000 && ok; i++) { - mi_free(ps[i]); - } - } - result = ok; - }; CHECK_BODY("malloc-aligned-at1") { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); }; From a582d760ed8266af9fab445bf3e06e65d073a6f3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 12:39:15 -0700 Subject: [PATCH 204/352] refine start offset in a page --- src/segment.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index 1e23bb1a..451ef250 100644 --- a/src/segment.c +++ b/src/segment.c @@ -318,7 +318,11 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c // make the start not OS page aligned for smaller blocks to avoid page/cache effects // note: the offset must always be an xblock_size multiple since we assume small allocations // are aligned (see `mi_heap_malloc_aligned`). - size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 512 ? xblock_size : 0); + size_t start_offset = 0; + if (xblock_size >= MI_INTPTR_SIZE) { + if (xblock_size <= 64) { start_offset = 3*xblock_size; } + else if (xblock_size <= 512) { start_offset = xblock_size; } + } if (page_size != NULL) { *page_size = psize - start_offset; } return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } From 1cbc55f2b8baccf8225024923cc840a5cc0773e7 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 23 Mar 2023 13:05:10 -0700 Subject: [PATCH 205/352] fix initialization of decommit mask for huge pages --- src/segment.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/segment.c b/src/segment.c index 451ef250..c9525490 100644 --- a/src/segment.c +++ b/src/segment.c @@ -509,6 +509,7 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_ mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); return mi_segment_commitx(segment,true,p,size,stats); } @@ -904,6 +905,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); #endif } + else { + segment->decommit_expire = 0; + mi_commit_mask_create_empty( &segment->decommit_mask ); + } // initialize segment info const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); From 165b84705132bac86dc680620bd1c35f639809bc Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 23 Mar 2023 16:11:38 -0700 Subject: [PATCH 206/352] improve segment_cache assertions --- include/mimalloc/internal.h | 2 +- src/os.c | 2 +- src/page.c | 2 + src/prim/unix/prim.c | 2 +- src/segment-cache.c | 77 ++++++++++++++++++++----------------- src/segment.c | 2 +- 6 files changed, 48 insertions(+), 39 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 8c9e98a1..710b9e6f 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -115,7 +115,7 @@ mi_arena_id_t _mi_arena_id_none(void); bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id); // "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); void _mi_segment_cache_free_all(mi_os_tld_t* tld); diff --git a/src/os.c b/src/os.c index d6c94b11..5ac37c2e 100644 --- a/src/os.c +++ b/src/os.c @@ -364,7 +364,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ int err = _mi_prim_commit(start, csize, commit); if (err != 0) { - _mi_warning_message("cannot %s OS memory (error: %d (0x%d), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize); + _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize); } mi_assert_internal(err == 0); return (err == 0); diff --git a/src/page.c b/src/page.c index f650af31..fd1af187 100644 --- a/src/page.c +++ b/src/page.c @@ -92,8 +92,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) { } #endif + #if !MI_TSAN mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); + #endif //size_t tfree_count = mi_page_list_count(page, tfree); //mi_assert_internal(tfree_count <= page->thread_freed + 1); diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 0ca9bc64..e51fb6bd 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -395,7 +395,7 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, vo long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { err = errno; - _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%d))\n", numa_node, err, err); + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err); } } return (*addr != NULL ? 0 : errno); diff --git a/src/segment-cache.c b/src/segment-cache.c index 4a16a18a..6f9d5fcb 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -35,8 +35,8 @@ typedef struct mi_cache_slot_s { static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 -static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! -static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; +static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! +static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { @@ -48,7 +48,8 @@ static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void mi_decl_noinline static void* mi_segment_cache_pop_ex( bool all_suitable, size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, + mi_commit_mask_t* decommit_mask, bool large_allowed, + bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE @@ -66,23 +67,28 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex( if (start_field >= MI_CACHE_FIELDS) start_field = 0; } - // find an available slot + // find an available slot and make it unavailable mi_bitmap_index_t bitidx = 0; bool claimed = false; mi_arena_id_t req_arena_id = _req_arena_id; mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? - if (*large) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); + if (large_allowed) { // large allowed? + claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = true; } if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); + claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = false; } if (!claimed) return NULL; + // no longer available but still in-use + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); + // found a slot mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; void* p = slot->p; @@ -95,16 +101,15 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex( mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); // mark the slot as free again - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); return p; #endif } -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { - return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large, is_pinned, is_zero, _req_arena_id, memid, tld); + return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); } static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) @@ -113,10 +118,11 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo // nothing } else if (mi_commit_mask_is_full(cmask)) { + // decommit the whole in one call _mi_os_decommit(p, total, stats); } else { - // todo: one call to decommit the whole at once? + // decommit parts mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); size_t part = total/MI_COMMIT_MASK_BITS; size_t idx; @@ -148,21 +154,25 @@ static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, if (expire != 0 && (force || now >= expire)) { // racy read // seems expired, first claim it from available purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { - // was available, we claimed it + mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); + if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // no need to check large as those cannot be decommitted anyways + // it was available, we claimed it (and made it unavailable) + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); + // we can now access it safely expire = mi_atomic_loadi64_acquire(&slot->expire); if (expire != 0 && (force || now >= expire)) { // safe read + mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); // still expired, decommit it mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask)); _mi_abandoned_await_readers(); // wait until safe to decommit // decommit committed parts // TODO: instead of decommit, we could also free to the OS? mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); mi_commit_mask_create_empty(&slot->decommit_mask); } - _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop + _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push } @@ -184,23 +194,20 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) { mi_commit_mask_t decommit_mask; bool is_pinned; bool is_zero; + bool is_large; size_t memid; const size_t size = MI_SEGMENT_SIZE; - // iterate twice: first large pages, then regular memory - for (int i = 0; i < 2; i++) { - void* p; - do { - // keep popping and freeing the memory - bool large = (i == 0); - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, - &large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); - if (p != NULL) { - size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); - if (csize > 0 && !is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); - _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } - } while (p != NULL); - } + void* p; + do { + // keep popping and freeing the memory + p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, + true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); + if (p != NULL) { + size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); + if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } + _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); + } + } while (p != NULL); } mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) @@ -228,8 +235,8 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); if (!claimed) return false; - mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); #if MI_DEBUG>1 if (is_pinned || is_large) { mi_assert_internal(mi_commit_mask_is_full(commit_mask)); @@ -257,7 +264,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me } // make it available - _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); + _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx); return true; #endif } @@ -273,7 +280,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me #if (MI_INTPTR_SIZE==8) -#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB +#define MI_MAX_ADDRESS ((size_t)40 << 40) // 20TB #else #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb #endif diff --git a/src/segment.c b/src/segment.c index c9525490..f8a5d6a0 100644 --- a/src/segment.c +++ b/src/segment.c @@ -809,7 +809,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment // get from cache? if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); } // get from OS From 90600188a8624bc30e807b62dc643c0dc7e3d6e7 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 28 Mar 2023 09:58:31 -0700 Subject: [PATCH 207/352] remove superfluous prototypes --- src/arena.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/arena.c b/src/arena.c index 674df73f..18e3f2ac 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,17 +28,6 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo #include "bitmap.h" // atomic bitmap -// os.c -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); - -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); - -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); - - /* ----------------------------------------------------------- Arena allocation ----------------------------------------------------------- */ From 6dd3073a752e479a53b1b7760193103d0e83e5d6 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 28 Mar 2023 10:16:19 -0700 Subject: [PATCH 208/352] avoid caching segments in pinned arenas; happes with huge OS page reservations --- src/segment-cache.c | 17 ++++++++++++----- src/segment.c | 6 ++++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index 6f9d5fcb..eeae1b50 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -216,20 +216,27 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me return false; #else - // only for normal segment blocks + // purge expired entries + mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); + + // only cache normal segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; + // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast) + // This is a common case with reserved huge OS pages. + // + // (note: we could also allow segments that are already fully decommitted but that never happens + // as the first slice is always committed (for the segment metadata)) + if (!_mi_arena_is_os_allocated(memid) && is_pinned) return false; + // numa node determines start field int numa_node = _mi_os_numa_node(NULL); size_t start_field = 0; if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node; if (start_field >= MI_CACHE_FIELDS) start_field = 0; } - // purge expired entries - mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); - // find an available slot mi_bitmap_index_t bitidx; bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); diff --git a/src/segment.c b/src/segment.c index f8a5d6a0..dc25dbda 100644 --- a/src/segment.c +++ b/src/segment.c @@ -397,8 +397,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); + if (!segment->mem_is_pinned) { + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); + if (csize > 0) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } + } _mi_abandoned_await_readers(); // wait until safe to free _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats); } From c344bf5c20b7357fa84ad62d5bac7c0b8c7706fc Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 31 Mar 2023 21:18:50 -0700 Subject: [PATCH 209/352] wip: work on purgable arenas --- src/segment-cache.c | 2 +- src/segment.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/segment-cache.c b/src/segment-cache.c index eeae1b50..58b98df3 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -16,7 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "./bitmap.h" // atomic bitmap -//#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache +// #define MI_CACHE_DISABLE 1 // define to completely disable the segment cache #define MI_CACHE_FIELDS (16) #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit diff --git a/src/segment.c b/src/segment.c index 3e56d50f..8825ce52 100644 --- a/src/segment.c +++ b/src/segment.c @@ -11,7 +11,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset #include -#define MI_PAGE_HUGE_ALIGN (256*1024) +#define MI_USE_SEGMENT_CACHE 0 +#define MI_PAGE_HUGE_ALIGN (256*1024) static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); @@ -394,8 +395,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); - if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache - !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) +#if MI_USE_SEGMENT_CACHE + if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache + || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) +#endif { if (!segment->mem_is_pinned) { const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); @@ -809,10 +812,14 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = NULL; + #if MI_USE_SEGMENT_CACHE // get from cache? if (page_alignment == 0) { segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); } + #else + MI_UNUSED(pdecommit_mask); + #endif // get from OS if (segment==NULL) { From 94a867869e98e5113ffda4bc91c2668969bb38f1 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 13:53:43 -0700 Subject: [PATCH 210/352] wip: purgeable arenas; fix asan warnings --- src/arena.c | 18 +++++++++++++----- src/os.c | 3 +++ src/prim/unix/prim.c | 3 ++- src/segment.c | 12 +++++++++--- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/arena.c b/src/arena.c index c4665a8f..c99267c8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -133,7 +133,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; @@ -189,6 +189,8 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren // no need to commit, but check if already fully committed *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } + + mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE); return p; } @@ -300,7 +302,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; } - } + } } // finally, fall back to the OS @@ -356,10 +358,11 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, const size_t size = blocks * MI_ARENA_BLOCK_SIZE; void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE); const bool decommitted = mi_os_purge(p, size, stats); + // clear the purged blocks + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); // update committed bitmap if (decommitted) { _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); - _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); } } @@ -520,14 +523,19 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - + // potentially decommit if (!arena->allow_decommit || arena->blocks_committed == NULL) { - mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c) + mi_assert_internal(all_committed); } else { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); + if (!all_committed) { + // assume the entire range as no longer committed + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + } + // (delay) purge the entire range mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } diff --git a/src/os.c b/src/os.c index 6145ccb3..8bcdbf7b 100644 --- a/src/os.c +++ b/src/os.c @@ -411,6 +411,9 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) if (err != 0) { _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } + else { + mi_track_mem_undefined(start, csize); + } return (err == 0); } diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 011ffa7c..09c76f90 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -380,7 +380,8 @@ int _mi_prim_commit(void* start, size_t size, bool commit) { } int _mi_prim_reset(void* start, size_t size) { - #if defined(MADV_FREE) + // note: disable the use of MADV_FREE since it leads to confusing stats :-( + #if 0 // defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); int err; diff --git a/src/segment.c b/src/segment.c index 8825ce52..e4381e74 100644 --- a/src/segment.c +++ b/src/segment.c @@ -400,12 +400,18 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) #endif { - if (!segment->mem_is_pinned) { + // if not all committed, an arena may decommit the whole area, but that double counts + // the already decommitted parts; adjust for that in the stats. + if (!mi_commit_mask_is_full(&segment->commit_mask)) { const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - if (csize > 0) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } + mi_assert_internal(size > csize); + if (size > csize) { + _mi_stat_increase(&_mi_stats_main.committed, size - csize); + } } _mi_abandoned_await_readers(); // wait until safe to free - _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats); + _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, + mi_commit_mask_is_full(&segment->commit_mask) /* all committed? */, tld->stats); } } From f5ab38f87b692371a5aba6ce7cb942ac20022321 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 15:06:09 -0700 Subject: [PATCH 211/352] wip: use purge throughout for segments and arenas; more agressive delays --- doc/mimalloc-doc.h | 8 +- include/mimalloc.h | 14 ++-- include/mimalloc/internal.h | 5 +- include/mimalloc/types.h | 9 ++- src/arena.c | 23 ++---- src/options.c | 16 ++-- src/os.c | 15 ++++ src/region.c | 8 +- src/segment-cache.c | 28 +++---- src/segment.c | 142 +++++++++++++++++++----------------- 10 files changed, 141 insertions(+), 127 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 815901f2..3e75243b 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -821,12 +821,12 @@ typedef enum mi_option_e { mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows) mi_option_segment_reset, ///< Experimental mi_option_reset_delay, ///< Delay in milli-seconds before resetting a page (100ms by default) - mi_option_reset_decommits, ///< Experimental + mi_option_purge_decommits, ///< Experimental // v2.x specific options - mi_option_allow_decommit, ///< Enable decommitting memory (=on) - mi_option_decommit_delay, ///< Decommit page memory after N milli-seconds delay (25ms). - mi_option_segment_decommit_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms). + mi_option_allow_purge, ///< Enable decommitting memory (=on) + mi_option_purge_delay, ///< Decommit page memory after N milli-seconds delay (25ms). + mi_option_segment_purge_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms). _mi_option_last } mi_option_t; diff --git a/include/mimalloc.h b/include/mimalloc.h index 23ac05b7..6ade2e96 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -324,27 +324,27 @@ typedef enum mi_option_e { // some of the following options are experimental // (deprecated options are kept for binary backward compatibility with v1.x versions) mi_option_eager_commit, - mi_option_deprecated_eager_region_commit, - mi_option_reset_decommits, + mi_option_eager_arena_commit, + mi_option_purge_decommits, mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup mi_option_deprecated_segment_cache, mi_option_page_reset, - mi_option_abandoned_page_decommit, + mi_option_abandoned_page_purge, mi_option_deprecated_segment_reset, mi_option_eager_commit_delay, - mi_option_decommit_delay, + mi_option_purge_delay, mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes. mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas) mi_option_os_tag, mi_option_max_errors, mi_option_max_warnings, mi_option_max_segment_reclaim, - mi_option_allow_decommit, - mi_option_segment_decommit_delay, - mi_option_decommit_extend_delay, + mi_option_allow_purge, + mi_option_deprecated_segment_decommit_delay, + mi_option_purge_extend_delay, mi_option_destroy_on_exit, mi_option_arena_reserve, mi_option_arena_purge_delay, diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2108a909..e97e7d91 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -93,6 +93,7 @@ size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); @@ -120,8 +121,8 @@ bool _mi_arena_is_os_allocated(size_t arena_memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); // "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); void _mi_segment_cache_free_all(mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index c7ddaaae..38b13883 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) // 1MiB #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS @@ -379,9 +379,10 @@ typedef struct mi_segment_s { size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) - bool allow_decommit; - mi_msecs_t decommit_expire; - mi_commit_mask_t decommit_mask; + bool allow_decommit; + bool allow_purge; + mi_msecs_t purge_expire; + mi_commit_mask_t purge_mask; mi_commit_mask_t commit_mask; _Atomic(struct mi_segment_s*) abandoned_next; diff --git a/src/arena.c b/src/arena.c index c99267c8..1f0dd2f8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -297,7 +297,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? { mi_arena_id_t arena_id = 0; - const bool arena_commit = _mi_os_has_overcommit(); + const bool arena_commit = _mi_os_has_overcommit() || mi_option_is_enabled(mi_option_eager_arena_commit); if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; @@ -336,20 +336,6 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { Arena purge ----------------------------------------------------------- */ -// either resets or decommits memory, returns true if the memory was decommitted. -static bool mi_os_purge(void* p, size_t size, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits) && // should decommit? - !_mi_preloading()) // don't decommit during preloading (unsafe) - { - _mi_os_decommit(p, size, stats); - return true; // decommitted - } - else { - _mi_os_reset(p, size, stats); - return false; // not decommitted - } -} - // reset or decommit in an arena and update the committed/decommit bitmaps static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_committed != NULL); @@ -357,7 +343,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_assert_internal(arena->allow_decommit); const size_t size = blocks * MI_ARENA_BLOCK_SIZE; void* const p = arena->start + (mi_bitmap_index_bit(bitmap_idx) * MI_ARENA_BLOCK_SIZE); - const bool decommitted = mi_os_purge(p, size, stats); + const bool decommitted = _mi_os_purge(p, size, stats); // clear the purged blocks _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); // update committed bitmap @@ -369,6 +355,8 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, // Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_purge != NULL); + if (!mi_option_is_enabled(mi_option_allow_purge)) return; + const long delay = mi_option_get(mi_option_arena_purge_delay); if (_mi_preloading() || delay == 0) { // decommit directly @@ -468,7 +456,8 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { const long delay = mi_option_get(mi_option_arena_purge_delay); - if (_mi_preloading() || delay == 0 /* || !mi_option_is_enabled(mi_option_allow_decommit) */) return; // nothing will be scheduled + if (_mi_preloading() || delay == 0 || !mi_option_is_enabled(mi_option_allow_purge)) return; // nothing will be scheduled + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); if (max_arena == 0) return; diff --git a/src/options.c b/src/options.c index 239ec308..44eee4b6 100644 --- a/src/options.c +++ b/src/options.c @@ -60,15 +60,15 @@ static mi_option_desc_t options[_mi_option_last] = // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, + { 0, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) }, + { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates + { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge, abandoned_page_decommit) },// decommit free page memory when a thread terminates { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed @@ -77,23 +77,23 @@ static mi_option_desc_t options[_mi_option_last] = #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif - { 25, UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds + { 10, UNINIT, MI_OPTION_LEGACY(purge_delay, decommit_delay) }, // page decommit delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. - { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) - { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments - { 1, UNINIT, MI_OPTION(decommit_extend_delay) }, + { 1, UNINIT, MI_OPTION_LEGACY(allow_purge, allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) + { 100, UNINIT, MI_OPTION(deprecated_segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments + { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! #if (MI_INTPTR_SIZE>4) { 1024L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time #else { 128L*1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif - { 500, UNINIT, MI_OPTION(arena_purge_delay) } // reset/decommit delay in milli-seconds for arena allocation + { 100, UNINIT, MI_OPTION(arena_purge_delay) } // reset/decommit delay in milli-seconds for arena allocation }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index 8bcdbf7b..20c6f8eb 100644 --- a/src/os.c +++ b/src/os.c @@ -436,6 +436,21 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat } */ +// either resets or decommits memory, returns true if the memory was decommitted. +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) { + if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? + !_mi_preloading()) // don't decommit during preloading (unsafe) + { + _mi_os_decommit(p, size, stats); + return true; // decommitted + } + else { + _mi_os_reset(p, size, stats); + return false; // not decommitted + } +} + + // Protect a region in memory to be not accessible. static bool mi_os_protectx(void* addr, size_t size, bool protect) { // page align conservatively within the range diff --git a/src/region.c b/src/region.c index 809b9ec8..b01d4091 100644 --- a/src/region.c +++ b/src/region.c @@ -307,7 +307,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* mi_assert_internal(!info.x.is_large && !info.x.is_pinned); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); _mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed + if (*commit || !mi_option_is_enabled(mi_option_purge_decommits)) { // only if needed bool reset_zero = false; _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); if (reset_zero) *is_zero = true; @@ -415,7 +415,7 @@ void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, s // reset the blocks to reduce the working set. if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) && (mi_option_is_enabled(mi_option_eager_commit) || - mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead + mi_option_is_enabled(mi_option_purge_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { bool any_unreset; _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); @@ -467,7 +467,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { -----------------------------------------------------------------------------*/ bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { + if (mi_option_is_enabled(mi_option_purge_decommits)) { return _mi_os_decommit(p, size, tld->stats); } else { @@ -476,7 +476,7 @@ bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { } bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { + if (mi_option_is_enabled(mi_option_purge_decommits)) { return _mi_os_commit(p, size, is_zero, tld->stats); } else { diff --git a/src/segment-cache.c b/src/segment-cache.c index 58b98df3..a98e6b07 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -29,7 +29,7 @@ typedef struct mi_cache_slot_s { size_t memid; bool is_pinned; mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; + mi_commit_mask_t purge_mask; _Atomic(mi_msecs_t) expire; } mi_cache_slot_t; @@ -48,7 +48,7 @@ static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void mi_decl_noinline static void* mi_segment_cache_pop_ex( bool all_suitable, size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* decommit_mask, bool large_allowed, + mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { @@ -96,7 +96,7 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex( *is_pinned = slot->is_pinned; *is_zero = false; *commit_mask = slot->commit_mask; - *decommit_mask = slot->decommit_mask; + *purge_mask = slot->purge_mask; slot->p = NULL; mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); @@ -107,9 +107,9 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex( } -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { - return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); + return mi_segment_cache_pop_ex(false, size, commit_mask, purge_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); } static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) @@ -142,7 +142,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) { MI_UNUSED(tld); - if (!mi_option_is_enabled(mi_option_allow_decommit)) return; + if (!mi_option_is_enabled(mi_option_allow_purge)) return; mi_msecs_t now = _mi_clock_now(); size_t purged = 0; const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); @@ -170,7 +170,7 @@ static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, // decommit committed parts // TODO: instead of decommit, we could also free to the OS? mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->decommit_mask); + mi_commit_mask_create_empty(&slot->purge_mask); } _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } @@ -191,7 +191,7 @@ void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { void _mi_segment_cache_free_all(mi_os_tld_t* tld) { mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; + mi_commit_mask_t purge_mask; bool is_pinned; bool is_zero; bool is_large; @@ -200,7 +200,7 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) { void* p; do { // keep popping and freeing the memory - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, + p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &purge_mask, true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); if (p != NULL) { size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); @@ -210,7 +210,7 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) { } while (p != NULL); } -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) +mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return false; @@ -257,13 +257,13 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me slot->is_pinned = is_pinned; mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); slot->commit_mask = *commit_mask; - slot->decommit_mask = *decommit_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { - long delay = mi_option_get(mi_option_segment_decommit_delay); + slot->purge_mask = *purge_mask; + if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_purge)) { + long delay = mi_option_get(mi_option_arena_purge_delay); if (delay == 0) { _mi_abandoned_await_readers(); // wait until safe to decommit mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->decommit_mask); + mi_commit_mask_create_empty(&slot->purge_mask); } else { mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); diff --git a/src/segment.c b/src/segment.c index e4381e74..65b21d94 100644 --- a/src/segment.c +++ b/src/segment.c @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_USE_SEGMENT_CACHE 0 #define MI_PAGE_HUGE_ALIGN (256*1024) -static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); +static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); // ------------------------------------------------------------------- @@ -258,7 +258,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // can only decommit committed blocks //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -390,14 +390,14 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_os_unprotect(end, os_pagesize); } - // purge delayed decommits now? (no, leave it to the cache) - // mi_segment_delayed_decommit(segment,true,tld->stats); + // purge delayed decommits now? (no, leave it to the arena) + // mi_segment_delayed_purge(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); #if MI_USE_SEGMENT_CACHE if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache - || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) + || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->purge_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) #endif { // if not all committed, an arena may decommit the whole area, but that double counts @@ -478,7 +478,7 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // commit liberal, but decommit conservative uint8_t* start = NULL; @@ -488,6 +488,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + // committing bool is_zero = false; mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); @@ -496,41 +497,47 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s mi_commit_mask_set(&segment->commit_mask, &mask); } else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { + // purging mi_assert_internal((void*)start != (void*)segment); - //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask)); - - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (segment->allow_decommit) { - _mi_os_decommit(start, full_size, stats); // ok if this fails - } - mi_commit_mask_clear(&segment->commit_mask, &mask); + if (mi_option_is_enabled(mi_option_allow_purge)) { + if (segment->allow_decommit) { + const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit + if (decommitted) { + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting + mi_commit_mask_clear(&segment->commit_mask, &mask); + } + } + else if (segment->allow_purge) { + _mi_os_reset(start, full_size, stats); + } + } } // increase expiration of reusing part of the delayed decommit - if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) { - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); + if (commit && mi_commit_mask_any_set(&segment->purge_mask, &mask)) { + segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay); } - // always undo delayed decommits - mi_commit_mask_clear(&segment->decommit_mask, &mask); + // always undo delayed purges + mi_commit_mask_clear(&segment->purge_mask, &mask); return true; } static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow - if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed + if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); return mi_segment_commitx(segment,true,p,size,stats); } -static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - if (!segment->allow_decommit) return; - if (mi_option_get(mi_option_decommit_delay) == 0) { +static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + if (!segment->allow_purge) return; + if (mi_option_get(mi_option_purge_delay) == 0) { mi_segment_commitx(segment, false, p, size, stats); } else { - // register for future decommit in the decommit mask + // register for future purge in the purge mask uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; @@ -538,39 +545,39 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ if (mi_commit_mask_is_empty(&mask) || full_size==0) return; // update delayed commit - mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask)); mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more - mi_commit_mask_set(&segment->decommit_mask, &cmask); + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only purge what is committed; span_free may try to decommit more + mi_commit_mask_set(&segment->purge_mask, &cmask); mi_msecs_t now = _mi_clock_now(); - if (segment->decommit_expire == 0) { + if (segment->purge_expire == 0) { // no previous decommits, initialize now - segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay); + segment->purge_expire = now + mi_option_get(mi_option_purge_delay); } - else if (segment->decommit_expire <= now) { + else if (segment->purge_expire <= now) { // previous decommit mask already expired - if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) { - mi_segment_delayed_decommit(segment, true, stats); + if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) { + mi_segment_delayed_purge(segment, true, stats); } else { - segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's } } else { // previous decommit mask is not yet expired, increase the expiration by a bit. - segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay); + segment->purge_expire += mi_option_get(mi_option_purge_extend_delay); } } } -static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { - if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return; +static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { + if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return; mi_msecs_t now = _mi_clock_now(); - if (!force && now < segment->decommit_expire) return; + if (!force && now < segment->purge_expire) return; - mi_commit_mask_t mask = segment->decommit_mask; - segment->decommit_expire = 0; - mi_commit_mask_create_empty(&segment->decommit_mask); + mi_commit_mask_t mask = segment->purge_mask; + segment->purge_expire = 0; + mi_commit_mask_create_empty(&segment->purge_mask); size_t idx; size_t count; @@ -583,7 +590,7 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st } } mi_commit_mask_foreach_end() - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); } @@ -596,7 +603,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) { } // note: can be called on abandoned segments -static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) { +static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) ? NULL : mi_span_queue_for(slice_count,tld)); @@ -616,8 +623,8 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size } // perhaps decommit - if (allow_decommit) { - mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); + if (allow_purge) { + mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); } // and push it on the free page queue (if it was not a huge page) @@ -794,7 +801,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id, size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, - mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask, + mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* ppurge_mask, bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { @@ -821,10 +828,10 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment #if MI_USE_SEGMENT_CACHE // get from cache? if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, ppurge_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); } #else - MI_UNUSED(pdecommit_mask); + MI_UNUSED(ppurge_mask); #endif // get from OS @@ -886,13 +893,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi bool is_zero = false; mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; + mi_commit_mask_t purge_mask; mi_commit_mask_create_empty(&commit_mask); - mi_commit_mask_create_empty(&decommit_mask); + mi_commit_mask_create_empty(&purge_mask); // Allocate the segment from the OS mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, - &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask, + &segment_slices, &pre_size, &info_slices, &commit_mask, &purge_mask, &is_zero, &commit, tld, os_tld); if (segment == NULL) return NULL; @@ -908,21 +915,22 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi } segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed - segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); - if (segment->allow_decommit) { - segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay); - segment->decommit_mask = decommit_mask; - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; + segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit; + if (segment->allow_purge) { + segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay); + segment->purge_mask = purge_mask; + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); #if MI_DEBUG>2 const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); mi_commit_mask_t commit_needed_mask; mi_commit_mask_create(0, commit_needed, &commit_needed_mask); - mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); + mi_assert_internal(!mi_commit_mask_any_set(&segment->purge_mask, &commit_needed_mask)); #endif } else { - segment->decommit_expire = 0; - mi_commit_mask_create_empty( &segment->decommit_mask ); + segment->purge_expire = 0; + mi_commit_mask_create_empty( &segment->purge_mask ); } // initialize segment info @@ -965,7 +973,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi } else { mi_assert_internal(huge_page!=NULL); - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask)); *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance @@ -1269,8 +1277,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } - // perform delayed decommits - mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats); + // perform delayed decommits (forcing is much slower on mstress) + mi_segment_delayed_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); @@ -1459,7 +1467,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again + mi_segment_delayed_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again mi_abandoned_visited_push(segment); } } @@ -1483,9 +1491,9 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // otherwise, decommit if needed and push on the visited list - // note: forced decommit can be expensive if many threads are destroyed/created as in mstress. - mi_segment_delayed_decommit(segment, force, tld->stats); + // otherwise, purge if needed and push on the visited list + // note: forced purge can be expensive if many threads are destroyed/created as in mstress. + mi_segment_delayed_purge(segment, force, tld->stats); mi_abandoned_visited_push(segment); } } @@ -1543,7 +1551,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); - mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats); + mi_segment_delayed_purge(_mi_ptr_segment(page), false, tld->stats); return page; } From a9f42376b793449396bc5e2d430f40153fecbebc Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 16:17:02 -0700 Subject: [PATCH 212/352] small changes; make minimal commit most fine grained --- include/mimalloc/types.h | 2 +- src/alloc-aligned.c | 4 ++-- src/init.c | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 38b13883..ebbea391 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) // 1MiB +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) // most fine-grained #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index e79a2220..59f5a524 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -146,10 +146,10 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); } -mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict inline void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { #if !MI_PADDING // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) - if (!_mi_is_power_of_two(alignment)) return NULL; + if mi_unlikely(!_mi_is_power_of_two(alignment)) return NULL; if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) #else // with padding, we can only guarantee this for fixed alignments diff --git a/src/init.c b/src/init.c index 0b4325e3..9378d028 100644 --- a/src/init.c +++ b/src/init.c @@ -37,6 +37,7 @@ const mi_page_t _mi_page_empty = { #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) +#if (MI_SMALL_WSIZE_MAX==128) #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } #elif (MI_PADDING>0) @@ -44,7 +45,9 @@ const mi_page_t _mi_page_empty = { #else #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } #endif - +#else +#error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX" +#endif // Empty page queues for every bin #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } From d22a13c990c8faf0031f7b463c02663bf9d96b8c Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 17:58:28 -0700 Subject: [PATCH 213/352] wip: purgeable arenas, various fixes --- include/mimalloc/types.h | 2 +- src/arena.c | 14 +++++++++++--- src/options.c | 2 +- src/prim/unix/prim.c | 16 ---------------- src/segment.c | 10 +++++----- 5 files changed, 18 insertions(+), 26 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ebbea391..962535e3 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -350,7 +350,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) // most fine-grained +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS diff --git a/src/arena.c b/src/arena.c index 1f0dd2f8..57db9f7e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -182,7 +182,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren if (any_uncommitted) { bool commit_zero; _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); - if (commit_zero) *is_zero = true; + if (commit_zero) { *is_zero = true; } } } else { @@ -190,7 +190,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } - mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE); + // mi_track_mem_undefined(p,needed_bcount*MI_ARENA_BLOCK_SIZE); return p; } @@ -297,7 +297,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? { mi_arena_id_t arena_id = 0; - const bool arena_commit = _mi_os_has_overcommit() || mi_option_is_enabled(mi_option_eager_arena_commit); + + bool arena_commit = _mi_os_has_overcommit(); + if (mi_option_get(mi_option_eager_arena_commit) == 1) { arena_commit = true; } + else if (mi_option_get(mi_option_eager_arena_commit) == 0) { arena_commit = false; } + if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); if (p != NULL) return p; @@ -513,6 +517,9 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, return; } + // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) + mi_track_mem_undefined(p,size); + // potentially decommit if (!arena->allow_decommit || arena->blocks_committed == NULL) { mi_assert_internal(all_committed); @@ -523,6 +530,7 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, if (!all_committed) { // assume the entire range as no longer committed _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_track_mem_noaccess(p,size); } // (delay) purge the entire range mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); diff --git a/src/options.c b/src/options.c index 44eee4b6..ca8bf5d8 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - { 0, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) }, + { 2, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) }, { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 09c76f90..8d027ebb 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -342,22 +342,6 @@ static void unix_mprotect_hint(int err) { int _mi_prim_commit(void* start, size_t size, bool commit) { - /* - #if 0 && defined(MAP_FIXED) && !defined(__APPLE__) - // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) - if (commit) { - // commit: just change the protection - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) - const int fd = mi_unix_mmap_fd(); - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } - } - #else - */ int err = 0; if (commit) { // commit: ensure we can access the area diff --git a/src/segment.c b/src/segment.c index 65b21d94..63e47742 100644 --- a/src/segment.c +++ b/src/segment.c @@ -756,7 +756,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); size_t next_index = mi_slice_index(slice) + slice_count; size_t next_count = slice->slice_count - slice_count; - mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld); + mi_segment_span_free(segment, next_index, next_count, false /* don't purge left-over part */, tld); slice->slice_count = (uint32_t)slice_count; } @@ -915,7 +915,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi } segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed - segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; + segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit; if (segment->allow_purge) { segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay); @@ -969,7 +969,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // initialize initial free pages if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page mi_assert_internal(huge_page==NULL); - mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld); + mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't purge */, tld); } else { mi_assert_internal(huge_page!=NULL); @@ -1585,7 +1585,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_assert_internal(psize - (aligned_p - start) >= size); uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list ptrdiff_t decommit_size = aligned_p - decommit_start; - _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } return page; @@ -1630,7 +1630,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc if (segment->allow_decommit) { const size_t csize = mi_usable_size(block) - sizeof(mi_block_t); uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); - _mi_os_decommit(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + _mi_os_reset(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } } #endif From 33d7503fdb1b9fbe047756309455f4223eab55dd Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Apr 2023 19:57:26 -0700 Subject: [PATCH 214/352] rename to arena_eager_commit --- include/mimalloc.h | 2 +- src/arena.c | 4 ++-- src/options.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 6ade2e96..cb408acc 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -324,7 +324,7 @@ typedef enum mi_option_e { // some of the following options are experimental // (deprecated options are kept for binary backward compatibility with v1.x versions) mi_option_eager_commit, - mi_option_eager_arena_commit, + mi_option_arena_eager_commit, mi_option_purge_decommits, mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup diff --git a/src/arena.c b/src/arena.c index 57db9f7e..ca4c87a3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -299,8 +299,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset mi_arena_id_t arena_id = 0; bool arena_commit = _mi_os_has_overcommit(); - if (mi_option_get(mi_option_eager_arena_commit) == 1) { arena_commit = true; } - else if (mi_option_get(mi_option_eager_arena_commit) == 0) { arena_commit = false; } + if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + else if (mi_option_get(mi_option_arena_eager_commit) == 0) { arena_commit = false; } if (mi_reserve_os_memory_ex(arena_reserve, arena_commit /* commit */, *large /* allow large*/, false /* exclusive */, &arena_id) == 0) { p = mi_arena_alloc_in(arena_id, numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); diff --git a/src/options.c b/src/options.c index ca8bf5d8..3eeccaae 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - { 2, UNINIT, MI_OPTION_LEGACY(eager_arena_commit,eager_region_commit) }, + { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages From 09297ba8cf7d8dd8429440acfcf326754cc58a5a Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 11:46:02 -0700 Subject: [PATCH 215/352] wip: purgeable arenas --- src/arena.c | 7 ++-- src/options.c | 4 +- src/os.c | 5 ++- src/segment.c | 101 ++++++++++++++++++++++++++++---------------------- src/stats.c | 4 +- 5 files changed, 68 insertions(+), 53 deletions(-) diff --git a/src/arena.c b/src/arena.c index ca4c87a3..134a6227 100644 --- a/src/arena.c +++ b/src/arena.c @@ -163,7 +163,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren // none of the claimed blocks should be scheduled for a decommit if (arena->blocks_purge != NULL) { - // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `in_use`). + // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`). _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index); } @@ -176,7 +176,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren *commit = true; } else if (*commit) { - // arena not committed as a whole, but commit requested: ensure commit now + // commit requested, but the range may not be committed as a whole: ensure it is committed now bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { @@ -294,7 +294,8 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); if (arena_reserve > 0 && arena_reserve >= size && // eager reserve enabled and large enough? req_arena_id == _mi_arena_id_none() && // not exclusive? - mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) ) // not too many arenas already? + mi_atomic_load_relaxed(&mi_arena_count) < 3*(MI_MAX_ARENAS/4) && // not too many arenas already? + !_mi_preloading() ) // and not before main runs { mi_arena_id_t arena_id = 0; diff --git a/src/options.c b/src/options.c index 3eeccaae..bb11b6a5 100644 --- a/src/options.c +++ b/src/options.c @@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] = // Some of the following options are experimental and not all combinations are valid. Use with care. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, - { 0, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, + { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N @@ -72,8 +72,6 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - #elif defined(_WIN32) - { 4, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif diff --git a/src/os.c b/src/os.c index 20c6f8eb..f54e2513 100644 --- a/src/os.c +++ b/src/os.c @@ -437,7 +437,10 @@ bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stat */ // either resets or decommits memory, returns true if the memory was decommitted. -bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) { +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats) +{ + if (!mi_option_is_enabled(mi_option_allow_purge)) return false; + if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) { diff --git a/src/segment.c b/src/segment.c index 63e47742..5a324adb 100644 --- a/src/segment.c +++ b/src/segment.c @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_USE_SEGMENT_CACHE 0 #define MI_PAGE_HUGE_ALIGN (256*1024) -static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); +static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); // ------------------------------------------------------------------- @@ -391,7 +391,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { } // purge delayed decommits now? (no, leave it to the arena) - // mi_segment_delayed_purge(segment,true,tld->stats); + // mi_segment_try_purge(segment,true,tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); @@ -476,49 +476,32 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin mi_commit_mask_create(bitidx, bitcount, cm); } - -static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { +static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); - // commit liberal, but decommit conservative + // commit liberal uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; - mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask); - if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + mi_segment_commit_mask(segment, false /* conservative? */, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size == 0) return true; - if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + if (!mi_commit_mask_all_set(&segment->commit_mask, &mask)) { // committing bool is_zero = false; mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; - mi_commit_mask_set(&segment->commit_mask, &mask); + if (!_mi_os_commit(start, full_size, &is_zero, stats)) return false; + mi_commit_mask_set(&segment->commit_mask, &mask); } - else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { - // purging - mi_assert_internal((void*)start != (void*)segment); - if (mi_option_is_enabled(mi_option_allow_purge)) { - if (segment->allow_decommit) { - const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit - if (decommitted) { - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting - mi_commit_mask_clear(&segment->commit_mask, &mask); - } - } - else if (segment->allow_purge) { - _mi_os_reset(start, full_size, stats); - } - } - } - // increase expiration of reusing part of the delayed decommit - if (commit && mi_commit_mask_any_set(&segment->purge_mask, &mask)) { + + // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon. + if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) { segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay); } - // always undo delayed purges + + // always clear any delayed purges in our range (as they are either committed now) mi_commit_mask_clear(&segment->purge_mask, &mask); return true; } @@ -528,13 +511,43 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_ // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - return mi_segment_commitx(segment,true,p,size,stats); + return mi_segment_commit(segment, p, size, stats); +} + +static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); + if (!segment->allow_purge) return true; + + // purge conservative + uint8_t* start = NULL; + size_t full_size = 0; + mi_commit_mask_t mask; + mi_segment_commit_mask(segment, true /* conservative? */, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + + if (mi_commit_mask_any_set(&segment->commit_mask, &mask)) { + // purging + mi_assert_internal((void*)start != (void*)segment); + mi_assert_internal(segment->allow_decommit); + const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit + if (decommitted) { + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting + mi_commit_mask_clear(&segment->commit_mask, &mask); + } + } + + // always clear any scheduled purges in our range + mi_commit_mask_clear(&segment->purge_mask, &mask); + return true; } static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { if (!segment->allow_purge) return; + if (mi_option_get(mi_option_purge_delay) == 0) { - mi_segment_commitx(segment, false, p, size, stats); + mi_segment_purge(segment, p, size, stats); } else { // register for future purge in the purge mask @@ -551,26 +564,26 @@ static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t mi_commit_mask_set(&segment->purge_mask, &cmask); mi_msecs_t now = _mi_clock_now(); if (segment->purge_expire == 0) { - // no previous decommits, initialize now + // no previous purgess, initialize now segment->purge_expire = now + mi_option_get(mi_option_purge_delay); } else if (segment->purge_expire <= now) { - // previous decommit mask already expired + // previous purge mask already expired if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) { - mi_segment_delayed_purge(segment, true, stats); + mi_segment_try_purge(segment, true, stats); } else { segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's } } else { - // previous decommit mask is not yet expired, increase the expiration by a bit. + // previous purge mask is not yet expired, increase the expiration by a bit. segment->purge_expire += mi_option_get(mi_option_purge_extend_delay); } } } -static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { +static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return; mi_msecs_t now = _mi_clock_now(); if (!force && now < segment->purge_expire) return; @@ -586,7 +599,7 @@ static void mi_segment_delayed_purge(mi_segment_t* segment, bool force, mi_stats if (count > 0) { uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); size_t size = count * MI_COMMIT_SIZE; - mi_segment_commitx(segment, false, p, size, stats); + mi_segment_purge(segment, p, size, stats); } } mi_commit_mask_foreach_end() @@ -916,7 +929,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed segment->allow_decommit = !segment->mem_is_pinned && !segment->mem_is_large; - segment->allow_purge = mi_option_is_enabled(mi_option_allow_purge) && segment->allow_decommit; + segment->allow_purge = segment->allow_decommit && mi_option_is_enabled(mi_option_allow_purge); if (segment->allow_purge) { segment->purge_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_purge_delay); segment->purge_mask = purge_mask; @@ -1278,7 +1291,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { } // perform delayed decommits (forcing is much slower on mstress) - mi_segment_delayed_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); + mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); @@ -1467,7 +1480,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again + mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again mi_abandoned_visited_push(segment); } } @@ -1493,7 +1506,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) else { // otherwise, purge if needed and push on the visited list // note: forced purge can be expensive if many threads are destroyed/created as in mstress. - mi_segment_delayed_purge(segment, force, tld->stats); + mi_segment_try_purge(segment, force, tld->stats); mi_abandoned_visited_push(segment); } } @@ -1551,7 +1564,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); - mi_segment_delayed_purge(_mi_ptr_segment(page), false, tld->stats); + mi_segment_try_purge(_mi_ptr_segment(page), false, tld->stats); return page; } diff --git a/src/stats.c b/src/stats.c index d2a31681..cc87513d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -111,7 +111,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); - + mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1); @@ -331,7 +331,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); - mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); From 9f0da5c1951aec25c432dff013c16e4f09244efd Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Apr 2023 16:48:02 -0700 Subject: [PATCH 216/352] merge ide files --- ide/vs2022/mimalloc-override.vcxproj | 3 --- ide/vs2022/mimalloc.vcxproj | 3 --- 2 files changed, 6 deletions(-) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 5c0513c6..52ed5282 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -257,11 +257,8 @@ -<<<<<<< HEAD -======= ->>>>>>> dev-reset diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 36100a79..33a719c1 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -235,11 +235,8 @@ -<<<<<<< HEAD -======= ->>>>>>> dev-reset From 96b02dda1ff02db716b48d86e60fcf67f3593b45 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 8 Apr 2023 17:55:07 -0700 Subject: [PATCH 217/352] fix accidental cmake move --- cmake/JoinPaths.cmake | 23 +++++++++++++++++++++++ cmake/mimalloc-config-version.cmake | 19 +++++++++++++++++++ cmake/mimalloc-config.cmake | 14 ++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 cmake/JoinPaths.cmake create mode 100644 cmake/mimalloc-config-version.cmake create mode 100644 cmake/mimalloc-config.cmake diff --git a/cmake/JoinPaths.cmake b/cmake/JoinPaths.cmake new file mode 100644 index 00000000..c68d91b8 --- /dev/null +++ b/cmake/JoinPaths.cmake @@ -0,0 +1,23 @@ +# This module provides function for joining paths +# known from most languages +# +# SPDX-License-Identifier: (MIT OR CC0-1.0) +# Copyright 2020 Jan Tojnar +# https://github.com/jtojnar/cmake-snips +# +# Modelled after Python’s os.path.join +# https://docs.python.org/3.7/library/os.path.html#os.path.join +# Windows not supported +function(join_paths joined_path first_path_segment) + set(temp_path "${first_path_segment}") + foreach(current_segment IN LISTS ARGN) + if(NOT ("${current_segment}" STREQUAL "")) + if(IS_ABSOLUTE "${current_segment}") + set(temp_path "${current_segment}") + else() + set(temp_path "${temp_path}/${current_segment}") + endif() + endif() + endforeach() + set(${joined_path} "${temp_path}" PARENT_SCOPE) +endfunction() diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake new file mode 100644 index 00000000..a44c121d --- /dev/null +++ b/cmake/mimalloc-config-version.cmake @@ -0,0 +1,19 @@ +set(mi_version_major 2) +set(mi_version_minor 1) +set(mi_version_patch 2) +set(mi_version ${mi_version_major}.${mi_version_minor}) + +set(PACKAGE_VERSION ${mi_version}) +if(PACKAGE_FIND_VERSION_MAJOR) + if("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "${mi_version_major}") + if ("${PACKAGE_FIND_VERSION_MINOR}" EQUAL "${mi_version_minor}") + set(PACKAGE_VERSION_EXACT TRUE) + elseif("${PACKAGE_FIND_VERSION_MINOR}" LESS "${mi_version_minor}") + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_UNSUITABLE TRUE) + endif() + else() + set(PACKAGE_VERSION_UNSUITABLE TRUE) + endif() +endif() diff --git a/cmake/mimalloc-config.cmake b/cmake/mimalloc-config.cmake new file mode 100644 index 00000000..a49b02a2 --- /dev/null +++ b/cmake/mimalloc-config.cmake @@ -0,0 +1,14 @@ +include(${CMAKE_CURRENT_LIST_DIR}/mimalloc.cmake) +get_filename_component(MIMALLOC_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH) # one up from the cmake dir, e.g. /usr/local/lib/cmake/mimalloc-2.0 +get_filename_component(MIMALLOC_VERSION_DIR "${CMAKE_CURRENT_LIST_DIR}" NAME) +string(REPLACE "/lib/cmake" "/lib" MIMALLOC_LIBRARY_DIR "${MIMALLOC_CMAKE_DIR}") +if("${MIMALLOC_VERSION_DIR}" EQUAL "mimalloc") + # top level install + string(REPLACE "/lib/cmake" "/include" MIMALLOC_INCLUDE_DIR "${MIMALLOC_CMAKE_DIR}") + set(MIMALLOC_OBJECT_DIR "${MIMALLOC_LIBRARY_DIR}") +else() + # versioned + string(REPLACE "/lib/cmake/" "/include/" MIMALLOC_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}") + string(REPLACE "/lib/cmake/" "/lib/" MIMALLOC_OBJECT_DIR "${CMAKE_CURRENT_LIST_DIR}") +endif() +set(MIMALLOC_TARGET_DIR "${MIMALLOC_LIBRARY_DIR}") # legacy From e35e919ea43e63b29738ca94173b2252a7b3b6f1 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 13 Apr 2023 15:37:54 -0700 Subject: [PATCH 218/352] remove segment-cache as it is superseded by better arena management --- CMakeLists.txt | 1 - ide/vs2022/mimalloc-override.vcxproj | 1 - ide/vs2022/mimalloc.vcxproj | 1 - include/mimalloc/internal.h | 7 - src/heap.c | 4 - src/init.c | 1 - src/segment-cache.c | 277 --------------------------- src/segment.c | 36 +--- src/static.c | 1 - 9 files changed, 5 insertions(+), 324 deletions(-) delete mode 100644 src/segment-cache.c diff --git a/CMakeLists.txt b/CMakeLists.txt index de2689a3..2bcd1ef7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,6 @@ set(mi_sources src/page.c src/random.c src/segment.c - src/segment-cache.c src/segment-map.c src/stats.c src/prim/prim.c) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 52ed5282..e2c7f71d 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -257,7 +257,6 @@ - diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 33a719c1..2916483d 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -235,7 +235,6 @@ - diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ab8c0d28..f4a08a09 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -120,16 +120,9 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_o void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); -bool _mi_arena_memid_is_os_allocated(mi_memid_t memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); bool _mi_arena_contains(const void* p); -// "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, mi_memid_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); -void _mi_segment_cache_free_all(mi_os_tld_t* tld); - // "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); diff --git a/src/heap.c b/src/heap.c index 9238812b..14c3d66c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -163,10 +163,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_segment_thread_collect(&heap->tld->segments); } - // decommit in global segment caches - // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment - _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os); - // collect regions on program-exit (or shared library unload) if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { _mi_arena_collect(false /* destroy arenas */, true /* force purge */, &heap->tld->stats); diff --git a/src/init.c b/src/init.c index 5fb1ae43..17dc2faf 100644 --- a/src/init.c +++ b/src/init.c @@ -632,7 +632,6 @@ static void mi_cdecl mi_process_done(void) { // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os); // release all cached segments _mi_arena_collect(true /* destroy (owned) arenas */, true /* purge the rest */, &_mi_heap_main_get()->tld->stats); } diff --git a/src/segment-cache.c b/src/segment-cache.c deleted file mode 100644 index 2aee27c6..00000000 --- a/src/segment-cache.c +++ /dev/null @@ -1,277 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- - Implements a cache of segments to avoid expensive OS calls and to reuse - the commit_mask to optimize the commit/decommit calls. - The full memory map of all segments is also implemented here. ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc/internal.h" -#include "mimalloc/atomic.h" - -#include "./bitmap.h" // atomic bitmap - -// #define MI_CACHE_DISABLE 1 // define to completely disable the segment cache - -#define MI_CACHE_FIELDS (16) -#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit - -#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) -#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes - -typedef struct mi_cache_slot_s { - void* p; - mi_memid_t memid; - bool is_pinned; - mi_commit_mask_t commit_mask; - mi_commit_mask_t purge_mask; - _Atomic(mi_msecs_t) expire; -} mi_cache_slot_t; - -static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 - -static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! -static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; -static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free - -static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { - mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); -} - -mi_decl_noinline static void* mi_segment_cache_pop_ex( - bool all_suitable, - size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* purge_mask, bool large_allowed, - bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t _req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return NULL; -#else - - // only segment blocks - if (size != MI_SEGMENT_SIZE) return NULL; - - // numa node determines start field - const int numa_node = _mi_os_numa_node(tld); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot and make it unavailable - mi_bitmap_index_t bitidx = 0; - bool claimed = false; - mi_arena_id_t req_arena_id = _req_arena_id; - mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? - - if (large_allowed) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = true; - } - if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = false; - } - - if (!claimed) return NULL; - - // no longer available but still in-use - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - - // found a slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - void* p = slot->p; - *memid = slot->memid; - *is_pinned = slot->is_pinned; - *is_zero = false; - *commit_mask = slot->commit_mask; - *purge_mask = slot->purge_mask; - slot->p = NULL; - mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - - // mark the slot as free again - _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); - return p; -#endif -} - - -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) -{ - return mi_segment_cache_pop_ex(false, size, commit_mask, purge_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); -} - -static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) -{ - if (mi_commit_mask_is_empty(cmask)) { - // nothing - } - else if (mi_commit_mask_is_full(cmask)) { - // decommit the whole in one call - _mi_os_decommit(p, total, stats); - } - else { - // decommit parts - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t part = total/MI_COMMIT_MASK_BITS; - size_t idx; - size_t count; - mi_commit_mask_foreach(cmask, idx, count) { - void* start = (uint8_t*)p + (idx*part); - size_t size = count*part; - _mi_os_decommit(start, size, stats); - } - mi_commit_mask_foreach_end() - } - mi_commit_mask_create_empty(cmask); -} - -#define MI_MAX_PURGE_PER_PUSH (4) - -static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) -{ - MI_UNUSED(tld); - if (!mi_option_is_enabled(mi_option_allow_purge)) return; - mi_msecs_t now = _mi_clock_now(); - size_t purged = 0; - const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); - size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); - for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots - if (idx >= MI_CACHE_MAX) idx = 0; // wrap - mi_cache_slot_t* slot = &cache[idx]; - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // racy read - // seems expired, first claim it from available - purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // no need to check large as those cannot be decommitted anyways - // it was available, we claimed it (and made it unavailable) - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); - // we can now access it safely - expire = mi_atomic_loadi64_acquire(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // safe read - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - // still expired, decommit it - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask)); - _mi_abandoned_await_readers(); // wait until safe to decommit - // decommit committed parts - // TODO: instead of decommit, we could also free to the OS? - mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->purge_mask); - } - _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop - } - if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push - } - } -} - -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { - if (force) { - // called on `mi_collect(true)` but not on thread termination - _mi_segment_cache_free_all(tld); - } - else { - mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); - } -} - -void _mi_segment_cache_free_all(mi_os_tld_t* tld) { - mi_commit_mask_t commit_mask; - mi_commit_mask_t purge_mask; - bool is_pinned; - bool is_zero; - bool is_large; - mi_memid_t memid; - const size_t size = MI_SEGMENT_SIZE; - void* p; - do { - // keep popping and freeing the memory - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &purge_mask, - true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); - if (p != NULL) { - size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); - if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } - _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } - } while (p != NULL); -} - -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, mi_memid_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return false; -#else - - // purge expired entries - mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); - - // only cache normal segment blocks - if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast) - // This is a common case with reserved huge OS pages. - // - // (note: we could also allow segments that are already fully decommitted but that never happens - // as the first slice is always committed (for the segment metadata)) - if (!_mi_arena_memid_is_os_allocated(memid) && is_pinned) return false; - - // numa node determines start field - int numa_node = _mi_os_numa_node(NULL); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot - mi_bitmap_index_t bitidx; - bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (!claimed) return false; - - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); -#if MI_DEBUG>1 - if (is_pinned || is_large) { - mi_assert_internal(mi_commit_mask_is_full(commit_mask)); - } -#endif - - // set the slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - slot->p = start; - slot->memid = memid; - slot->is_pinned = is_pinned; - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->commit_mask = *commit_mask; - slot->purge_mask = *purge_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_purge)) { - long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); - if (delay == 0) { - _mi_abandoned_await_readers(); // wait until safe to decommit - mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->purge_mask); - } - else { - mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); - } - } - - // make it available - _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx); - return true; -#endif -} diff --git a/src/segment.c b/src/segment.c index af4ed95c..7d0d2c28 100644 --- a/src/segment.c +++ b/src/segment.c @@ -11,7 +11,6 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset #include -#define MI_USE_SEGMENT_CACHE 0 #define MI_PAGE_HUGE_ALIGN (256*1024) static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); @@ -393,28 +392,11 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // purge delayed decommits now? (no, leave it to the arena) // mi_segment_try_purge(segment,true,tld->stats); - // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); -#if MI_USE_SEGMENT_CACHE - if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache - || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->purge_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) -#endif - { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - /* - // if not all committed, an arena may decommit the whole area, but that double counts - // the already decommitted parts; adjust for that in the stats. - if (!mi_commit_mask_is_full(&segment->commit_mask)) { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - mi_assert_internal(size > csize); - if (size > csize) { - _mi_stat_increase(&_mi_stats_main.committed, size - csize); - } - } - */ - _mi_abandoned_await_readers(); // wait until safe to free - _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, csize, tld->stats); - } + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); + + _mi_abandoned_await_readers(); // wait until safe to free + _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, csize, tld->stats); } // called by threads that are terminating @@ -819,6 +801,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + MI_UNUSED(ppurge_mask); mi_memid_t memid; bool mem_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy bool is_pinned = false; @@ -837,15 +820,6 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment } const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = NULL; - - #if MI_USE_SEGMENT_CACHE - // get from cache? - if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, ppurge_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); - } - #else - MI_UNUSED(ppurge_mask); - #endif // get from OS if (segment==NULL) { diff --git a/src/static.c b/src/static.c index 831e9ecd..bc05dd72 100644 --- a/src/static.c +++ b/src/static.c @@ -32,7 +32,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "page.c" // includes page-queue.c #include "random.c" #include "segment.c" -#include "segment-cache.c" #include "segment-map.c" #include "stats.c" #include "prim/prim.c" From 66aa7a17ace23315f2b1e44c9b9cfd874ac67af1 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 09:13:08 -0700 Subject: [PATCH 219/352] further fix for atomic build error suggested by Davide Di Gennaro (issue #729, pr #724) --- include/mimalloc/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 1951b470..130ef820 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -281,7 +281,7 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t; static inline bool mi_atomic_once( mi_atomic_once_t* once ) { if (mi_atomic_load_relaxed(once) != 0) return false; // quick test uintptr_t expected = 0; - return mi_atomic_cas_strong_acq_rel(once, &expected, 1UL); // try to set to 1 + return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 } // Yield From 6d42f2ac390e70424ffd402449a50c74079fd455 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 11:35:51 -0700 Subject: [PATCH 220/352] increase pipeline timeout to 10min for tsan --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c0f7ec74..1804be26 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -121,7 +121,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 300 + - script: ctest --verbose --timeout 600 workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) From 015aac05a5938ec82a45e5fe101de2adb6f7f6a8 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Apr 2023 12:24:51 -0700 Subject: [PATCH 221/352] keep tsan enabled for dev-slice --- azure-pipelines.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cb9b08fe..0247c76f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -108,12 +108,11 @@ jobs: CXX: clang++ BuildType: debug-ubsan-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_DEBUG_UBSAN=ON - # Disable for now as it times out on the azure build machines - # Debug TSAN Clang++: - # CC: clang - # CXX: clang++ - # BuildType: debug-tsan-clang-cxx - # cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON + Debug TSAN Clang++: + CC: clang + CXX: clang++ + BuildType: debug-tsan-clang-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON steps: - task: CMake@1 From 95c2059e89b75da2507184123a9aee15921c0788 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 23 Apr 2023 19:48:49 -0700 Subject: [PATCH 222/352] fix asan mem tracking --- src/segment.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/segment.c b/src/segment.c index 442b187a..a80c36ac 100644 --- a/src/segment.c +++ b/src/segment.c @@ -877,18 +877,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi &segment_slices, &pre_size, &info_slices, commit, tld, os_tld); if (segment == NULL) return NULL; - // zero the segment info? -- not always needed as it may be zero initialized from the OS - ptrdiff_t ofs = offsetof(mi_segment_t, next); - size_t prefix = offsetof(mi_segment_t, slices) - ofs; - size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more + // zero the segment info? -- not always needed as it may be zero initialized from the OS if (!segment->memid.was_zero) { + ptrdiff_t ofs = offsetof(mi_segment_t, next); + size_t prefix = offsetof(mi_segment_t, slices) - ofs; + size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more _mi_memzero((uint8_t*)segment + ofs, zsize); } - else { - mi_track_mem_defined((uint8_t*)segment + ofs,zsize); - mi_assert(mi_mem_is_zero((uint8_t*)segment + ofs, zsize)); - } - // initialize the rest of the segment info const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); From 5c9013302107d1f5f2f087f43975b49f0cd764e4 Mon Sep 17 00:00:00 2001 From: "microsoft-github-policy-service[bot]" <77245923+microsoft-github-policy-service[bot]@users.noreply.github.com> Date: Fri, 2 Jun 2023 17:40:26 +0000 Subject: [PATCH 223/352] Microsoft mandatory file --- SECURITY.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..e138ec5d --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). + + From 280123bd5c84e25579172e5a86c4285b92347d65 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 1 Mar 2024 15:25:57 -0800 Subject: [PATCH 224/352] purge on page free as well --- src/segment.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/segment.c b/src/segment.c index eca50d46..8e14ac07 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1026,6 +1026,10 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) // only abandoned pages; remove from free list and abandon mi_segment_abandon(segment,tld); } + else { + // perform delayed purges + mi_segment_try_purge(segment, false /* force? */, tld->stats); + } } @@ -1239,17 +1243,17 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld) { - *reclaimed = false; + *reclaimed = false; mi_segment_t* segment; mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap,¤t); - + // limit the tries to 10% (default) of the abandoned segments with at least 8 tries, and at most 1024. const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100); if (perc <= 0) return NULL; const size_t abandoned_count = _mi_arena_segment_abandoned_count(); const size_t relative_count = (abandoned_count > 10000 ? (abandoned_count / 100) * perc : (abandoned_count * perc) / 100); // avoid overflow long max_tries = (long)(relative_count < 8 ? 8 : (relative_count > 1024 ? 1024 : relative_count)); - while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) + while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) { segment->abandoned_visits++; // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments @@ -1288,7 +1292,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) { mi_segment_t* segment; mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, ¤t); - int max_tries = (force ? 16*1024 : 1024); // limit latency + int max_tries = (force ? 16*1024 : 1024); // limit latency while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) { mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { From 5a2ed6d97762758ad4c349c110b263d474070dd9 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 2 Mar 2024 18:27:06 -0800 Subject: [PATCH 225/352] fix assertion --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index 5c40c00e..7ada9b80 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1166,7 +1166,6 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s // Reclaim an abandoned segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { - mi_assert_expensive(mi_segment_is_valid(segment, tld)); if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } // can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free. mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id()); @@ -1216,6 +1215,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } mi_assert(segment->abandoned == 0); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); if (segment->used == 0) { // due to page_clear mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed)); mi_segment_free(segment, false, tld); From af3f2f9168fc0f2345c23d8c8b34a73563935834 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 22:32:32 -0700 Subject: [PATCH 226/352] fix assertions and warnings on gcc --- src/page-queue.c | 2 +- src/page.c | 4 ++-- src/segment.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/page-queue.c b/src/page-queue.c index 751caf95..470d1b64 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -141,7 +141,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* } #endif -static inline bool mi_page_is_large_or_huge(mi_page_t* page) { +static inline bool mi_page_is_large_or_huge(const mi_page_t* page) { return (mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_huge(page)); } diff --git a/src/page.c b/src/page.c index 808d863c..6bd53296 100644 --- a/src/page.c +++ b/src/page.c @@ -428,7 +428,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } -#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE +#define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE #define MI_RETIRE_CYCLES (16) // Retire a page with no more used blocks @@ -455,7 +455,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); + page->retire_expire = 1+(bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; diff --git a/src/segment.c b/src/segment.c index 2d29a5f3..e7843f37 100644 --- a/src/segment.c +++ b/src/segment.c @@ -601,7 +601,7 @@ static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* ----------------------------------------------------------- */ static bool mi_segment_is_abandoned(mi_segment_t* segment) { - return (segment->thread_id == 0); + return (mi_atomic_load_relaxed(&segment->thread_id) == 0); } // note: can be called on abandoned segments @@ -654,8 +654,8 @@ static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld // note: can be called on abandoned segments static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) { mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0); - mi_segment_t* segment = _mi_ptr_segment(slice); - bool is_abandoned = mi_segment_is_abandoned(segment); + mi_segment_t* const segment = _mi_ptr_segment(slice); + const bool is_abandoned = (segment->thread_id == 0); // mi_segment_is_abandoned(segment); // for huge pages, just mark as free but don't add to the queues if (segment->kind == MI_SEGMENT_HUGE) { From 7c17c3d33ed03a5cc19144cb99e3a8030b1c7cdf Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 24 Mar 2024 22:41:33 -0700 Subject: [PATCH 227/352] optimize page struct layout --- include/mimalloc/types.h | 7 ++++--- src/init.c | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index b8e7f97d..a08555ee 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -292,14 +292,15 @@ typedef struct mi_page_s { // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory - uint16_t used; // number of blocks in use (including blocks in `thread_free`) mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized uint8_t retire_expire:7; // expiration count for retired blocks - // padding + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + uint16_t used; // number of blocks in use (including blocks in `thread_free`) + uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) + // padding size_t block_size; // size available in each block (always `>0`) uint8_t* page_start; // start of the page area containing the blocks diff --git a/src/init.c b/src/init.c index 1a6a30e5..33161062 100644 --- a/src/init.c +++ b/src/init.c @@ -14,17 +14,17 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, + 0, false, false, false, false, 0, // capacity 0, // reserved capacity - 0, // used { 0 }, // flags - 0, // block size shift false, // is_zero 0, // retire_expire NULL, // free NULL, // local_free + 0, // used + 0, // block size shift 0, // block_size NULL, // page_start #if (MI_PADDING || MI_ENCODE_FREELIST) From cc8d89a08528500572390a648a874bc705bd91b2 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 25 Mar 2024 07:35:49 -0700 Subject: [PATCH 228/352] update comments --- include/mimalloc/types.h | 57 ++++++++++++++++++++++++---------------- src/page.c | 2 +- src/segment.c | 20 +++++++------- 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index a08555ee..4e96c5ec 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -13,9 +13,12 @@ terms of the MIT license. A copy of the license can be found in the file // mi_heap_t : all data for a thread-local heap, contains // lists of all managed heap pages. // mi_segment_t : a larger chunk of memory (32GiB) from where pages -// are allocated. -// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from +// are allocated. A segment is divided in slices (64KiB) from +// which pages are allocated. +// mi_page_t : a "mimalloc" page (usually 64KiB or 512KiB) from // where objects are allocated. +// Note: we always explicitly use "OS page" to refer to OS pages +// and just use "page" to refer to mimalloc pages (`mi_page_t`) // -------------------------------------------------------------------------- @@ -192,15 +195,15 @@ typedef int32_t mi_ssize_t; #error "mimalloc internal: define more bins" #endif -// Maximum slice offset (15) -#define MI_MAX_SLICE_OFFSET ((MI_BLOCK_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) - // blocks up to this size are always allocated aligned #define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments #define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) +// Maximum slice count (255) for which we can find the page for interior pointers +#define MI_MAX_SLICE_OFFSET_COUNT ((MI_BLOCK_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) + // ------------------------------------------------------ // Mimalloc pages contain allocated blocks @@ -285,9 +288,9 @@ typedef struct mi_page_s { // "owned" by the segment uint32_t slice_count; // slices in this page (0 if not a page) uint32_t slice_offset; // distance from the actual page data slice (0 if a page) - uint8_t is_committed : 1; // `true` if the page virtual memory is committed - uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized - uint8_t is_huge:1; // `true` if the page is in a huge segment + uint8_t is_committed:1; // `true` if the page virtual memory is committed + uint8_t is_zero_init:1; // `true` if the page was initially zero initialized + uint8_t is_huge:1; // `true` if the page is in a huge segment (`segment->kind == MI_SEGMENT_HUGE`) // padding // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` @@ -328,12 +331,13 @@ typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment MI_PAGE_LARGE, // larger blocks go into a page of just one block - MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment. + MI_PAGE_HUGE, // huge blocks (> `MI_LARGE_OBJ_SIZE_MAX) or with alignment `> MI_BLOCK_ALIGNMENT_MAX` + // are put into a single page in a single `MI_SEGMENT_HUGE` segment. } mi_page_kind_t; typedef enum mi_segment_kind_e { MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside. - MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. + MI_SEGMENT_HUGE, // segment with just one huge page inside. } mi_segment_kind_t; // ------------------------------------------------------ @@ -404,39 +408,48 @@ typedef struct mi_memid_s { } mi_memid_t; -// Segments are large allocated memory blocks (8mb on 64 bit) from -// the OS. Inside segments we allocated fixed size _pages_ that -// contain blocks. +// Segments are large allocated memory blocks (8mb on 64 bit) from arenas or the OS. +// +// Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks. +// The start of a segment is this structure with a fixed number of slice entries (`slices`) +// usually followed by a guard OS page and the actual allocation area with pages. +// While a page is not allocated, we view it's data as a `mi_slice_t` (instead of a `mi_page_t`). +// Of any free area, the first slice has the info and `slice_offset == 0`; for any subsequent +// slices part of the area, the `slice_offset` is the byte offset back to the first slice +// (so we can quickly find the page info on a free, `internal.h:_mi_segment_page_of`). +// For slices, the `block_size` field is repurposed to signify if a slice is used (`1`) or not (`0`). +// Small and medium pages use a fixed amount of slices to reduce slice fragmentation, while +// large and huge pages span a variable amount of slices. typedef struct mi_segment_s { // constant fields - mi_memid_t memid; // memory id for arena allocation - bool allow_decommit; - bool allow_purge; + mi_memid_t memid; // memory id for arena/OS allocation + bool allow_decommit; // can we decommmit the memory + bool allow_purge; // can we purge the memory (reset or decommit) size_t segment_size; // segment fields - mi_msecs_t purge_expire; - mi_commit_mask_t purge_mask; - mi_commit_mask_t commit_mask; + mi_msecs_t purge_expire; // purge slices in the `purge_mask` after this time + mi_commit_mask_t purge_mask; // slices that can be purged + mi_commit_mask_t commit_mask; // slices that are currently committed // from here is zero initialized struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`) bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation) size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t abandoned_visits; // count how often this segment is visited during abondoned reclamation (to force reclaim if it takes too long) size_t used; // count of pages in use uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT` - size_t segment_info_slices; // initial slices we are using segment info and possible guard pages. + size_t segment_info_slices; // initial count of slices that we are using for segment info and possible guard pages. // layout like this to optimize access in `mi_free` mi_segment_kind_t kind; size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment - mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment + mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one extra final entry for huge blocks with large alignment } mi_segment_t; diff --git a/src/page.c b/src/page.c index 6bd53296..05de541a 100644 --- a/src/page.c +++ b/src/page.c @@ -455,7 +455,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = 1+(bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); + page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; diff --git a/src/segment.c b/src/segment.c index e7843f37..1d2f1e47 100644 --- a/src/segment.c +++ b/src/segment.c @@ -11,7 +11,11 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset #include -#define MI_PAGE_HUGE_ALIGN (256*1024) +// ------------------------------------------------------------------- +// Segments +// mimalloc pages reside in segments. See `mi_segment_valid` for invariants. +// ------------------------------------------------------------------- + static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); @@ -146,10 +150,6 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) { /* -------------------------------------------------------------------------------- Segment allocation - - If a thread ends, it "abandons" pages with used blocks - and there is an abandoned segment list whose segments can - be reclaimed by still running threads, much like work-stealing. -------------------------------------------------------------------------------- */ @@ -268,10 +268,10 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(slice->slice_offset == 0); size_t index = mi_slice_index(slice); size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1; - if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets + if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET_COUNT valid back offsets used_count++; - if (segment->kind == MI_SEGMENT_HUGE) { mi_assert_internal(slice->is_huge); } - for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) { + mi_assert_internal(slice->is_huge == (segment->kind == MI_SEGMENT_HUGE)); + for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET_COUNT && index + i <= maxindex; i++) { mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t)); mi_assert_internal(i==0 || segment->slices[index + i].slice_count == 0); mi_assert_internal(i==0 || segment->slices[index + i].block_size == 1); @@ -720,9 +720,9 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i mi_page_t* page = mi_slice_to_page(slice); mi_assert_internal(mi_page_block_size(page) == bsize); - // set slice back pointers for the first MI_MAX_SLICE_OFFSET entries + // set slice back pointers for the first MI_MAX_SLICE_OFFSET_COUNT entries size_t extra = slice_count-1; - if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET; + if (extra > MI_MAX_SLICE_OFFSET_COUNT) extra = MI_MAX_SLICE_OFFSET_COUNT; if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1; // huge objects may have more slices than avaiable entries in the segment->slices mi_slice_t* slice_next = slice + 1; From a7c033caedef7af5e9d6f9a2318e2b8171c18215 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 19 Apr 2024 10:14:27 -0700 Subject: [PATCH 229/352] avoid unused warning --- src/alloc-aligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 0495c11d..b7e589ea 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -159,7 +159,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, // ensure a definition is emitted #if defined(__cplusplus) -static void* _mi_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned; +void* _mi_extern_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned; #endif // ------------------------------------------------------ From 6c5d6e1f721cd3eb369b93e4a1931d180a55a873 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 10 May 2024 17:24:52 -0700 Subject: [PATCH 230/352] fix max allocation size on 32-bit systems (issue #882) --- include/mimalloc/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 97438569..e2b9ce38 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -214,7 +214,7 @@ typedef int32_t mi_ssize_t; // we never allocate more than PTRDIFF_MAX (see also ) // on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877) -#if PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX) +#if (PTRDIFF_MAX > INT32_MAX) && (PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX)) #define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1)) #else #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX From d824b9db2b339650b4dd04ffae5ede8abd84889c Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 13 May 2024 10:11:57 -0700 Subject: [PATCH 231/352] fix page collection where a freed segment could be accessed --- src/heap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/heap.c b/src/heap.c index 2fb04f7a..6c56edd6 100644 --- a/src/heap.c +++ b/src/heap.c @@ -95,6 +95,11 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); _mi_page_free_collect(page, collect >= MI_FORCE); + if (collect == MI_FORCE) { + // note: call before a potential `_mi_page_free` as the segment may be freed if this was the last used page in that segment. + mi_segment_t* segment = _mi_page_segment(page); + _mi_segment_collect(segment, true /* force? */, &heap->tld->segments); + } if (mi_page_all_free(page)) { // no more used blocks, free the page. // note: this will free retired pages as well. @@ -104,10 +109,6 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t // still used blocks but the thread is done; abandon the page _mi_page_abandon(page, pq); } - if (collect == MI_FORCE) { - mi_segment_t* segment = _mi_page_segment(page); - _mi_segment_collect(segment, true /* force? */, &heap->tld->segments); - } return true; // don't break } From 44b65b19df9107c45147df31fcafedf135619411 Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 16 May 2024 13:30:33 -0700 Subject: [PATCH 232/352] remove pre_size parameter for slices --- src/segment.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/segment.c b/src/segment.c index 9ac22f15..9e1b39a2 100644 --- a/src/segment.c +++ b/src/segment.c @@ -347,7 +347,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } -static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) { +static size_t mi_segment_calculate_slices(size_t required, size_t* info_slices) { size_t page_size = _mi_os_page_size(); size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); size_t guardsize = 0; @@ -361,7 +361,6 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz } } - if (pre_size != NULL) *pre_size = isize; isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE); if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE; size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) ); @@ -808,7 +807,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren ----------------------------------------------------------- */ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id, - size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, + size_t* psegment_slices, size_t* pinfo_slices, bool commit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { @@ -825,7 +824,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN ); const size_t extra = align_offset - info_size; // recalculate due to potential guard pages - *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices); + *psegment_slices = mi_segment_calculate_slices(required + extra, pinfo_slices); mi_assert_internal(*psegment_slices > 0 && *psegment_slices <= UINT32_MAX); } @@ -874,8 +873,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // calculate needed sizes first size_t info_slices; - size_t pre_size; - size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices); + size_t segment_slices = mi_segment_calculate_slices(required, &info_slices); mi_assert_internal(segment_slices > 0 && segment_slices <= UINT32_MAX); // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) @@ -887,7 +885,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // Allocate the segment from the OS mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, - &segment_slices, &pre_size, &info_slices, commit, tld, os_tld); + &segment_slices, &info_slices, commit, tld, os_tld); if (segment == NULL) return NULL; // zero the segment info? -- not always needed as it may be zero initialized from the OS @@ -915,8 +913,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi if (MI_SECURE>0) { // in secure mode, we set up a protected page in between the segment info // and the page data, and at the end of the segment. - size_t os_pagesize = _mi_os_page_size(); - mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size); + size_t os_pagesize = _mi_os_page_size(); _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; mi_segment_ensure_committed(segment, end, os_pagesize, tld->stats); From 4a26a4568e0f593b7842d91fbf4ec5f80d06bc65 Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 16 May 2024 14:26:05 -0700 Subject: [PATCH 233/352] fix out-of-bounds write on span free in huge segments --- src/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index 9e1b39a2..6044c270 100644 --- a/src/segment.c +++ b/src/segment.c @@ -623,7 +623,9 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size mi_assert_internal(slice->slice_count == slice_count); // no overflow? slice->slice_offset = 0; if (slice_count > 1) { - mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; + mi_slice_t* last = slice + slice_count - 1; + mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment); + if (last > end) { last = end; } last->slice_count = 0; last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); last->block_size = 0; From 3c5e480ce73b02cd8bd8eca0846b2baf930c265d Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 17 Jun 2024 16:21:46 -0700 Subject: [PATCH 234/352] fix alignment test --- src/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index 869e05a8..bea43210 100644 --- a/src/segment.c +++ b/src/segment.c @@ -332,6 +332,8 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c if (block_size <= 64) { start_offset += 3*block_size; } else if (block_size <= 512) { start_offset += block_size; } } + mi_assert_internal(_mi_is_aligned(pstart + start_offset, MI_MAX_ALIGN_SIZE)); + mi_assert_internal(block_size == 0 || block_size > MI_MAX_ALIGN_GUARANTEE || _mi_is_aligned(pstart + start_offset,block_size)); if (page_size != NULL) { *page_size = psize - start_offset; } return (pstart + start_offset); } @@ -360,8 +362,6 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* info_slices) required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size; } } - mi_assert_internal(_mi_is_aligned(p, MI_MAX_ALIGN_SIZE)); - mi_assert_internal(block_size > MI_MAX_ALIGN_GUARANTEE || _mi_is_aligned(p,block_size)); isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE); if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE; From 01503df7f3bb9bc46c74d67bc5060552f9f66ded Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 12 Aug 2024 13:51:39 -0700 Subject: [PATCH 235/352] move declaration to avoid gcc warning, see issue #919 --- src/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index bea43210..f8e98655 100644 --- a/src/segment.c +++ b/src/segment.c @@ -663,8 +663,7 @@ static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) { mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0); mi_segment_t* const segment = _mi_ptr_segment(slice); - const bool is_abandoned = (segment->thread_id == 0); // mi_segment_is_abandoned(segment); - + // for huge pages, just mark as free but don't add to the queues if (segment->kind == MI_SEGMENT_HUGE) { // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case) @@ -676,6 +675,7 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ } // otherwise coalesce the span and add to the free span queues + const bool is_abandoned = (segment->thread_id == 0); // mi_segment_is_abandoned(segment); size_t slice_count = slice->slice_count; mi_slice_t* next = slice + slice->slice_count; mi_assert_internal(next <= mi_segment_slices_end(segment)); From f163164d364557e341b8e32684502fcb0ee60b58 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 20 Aug 2024 13:12:51 -0700 Subject: [PATCH 236/352] ensure start-offset in a segment respects minimal alignment --- src/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/segment.c b/src/segment.c index f8e98655..1906e8ac 100644 --- a/src/segment.c +++ b/src/segment.c @@ -332,6 +332,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c if (block_size <= 64) { start_offset += 3*block_size; } else if (block_size <= 512) { start_offset += block_size; } } + start_offset = _mi_align_up(start_offset, MI_MAX_ALIGN_SIZE); mi_assert_internal(_mi_is_aligned(pstart + start_offset, MI_MAX_ALIGN_SIZE)); mi_assert_internal(block_size == 0 || block_size > MI_MAX_ALIGN_GUARANTEE || _mi_is_aligned(pstart + start_offset,block_size)); if (page_size != NULL) { *page_size = psize - start_offset; } From ad02086d3b45de030680b895762fa8a018edd07e Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 21 Aug 2024 17:07:01 -0700 Subject: [PATCH 237/352] remove default MI_DEBUG_GUARDED --- include/mimalloc/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 4540998f..69f737b3 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -75,7 +75,7 @@ terms of the MIT license. A copy of the license can be found in the file // Use guard pages behind objects of a certain size (set by the MIMALLOC_DEBUG_GUARDED_MIN/MAX options) // Padding should be disabled when using guard pages -#define MI_DEBUG_GUARDED 1 +// #define MI_DEBUG_GUARDED 1 #if defined(MI_DEBUG_GUARDED) #define MI_PADDING 0 #endif From 723869014ff71b12c585bf9b9b51ee4128d1b71f Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 9 Oct 2024 21:24:20 -0700 Subject: [PATCH 238/352] add ability to abandon segments after a threshold --- include/mimalloc.h | 1 + include/mimalloc/internal.h | 2 + include/mimalloc/types.h | 2 +- src/arena-abandon.c | 2 +- src/options.c | 1 + src/page.c | 21 ++++++++++ src/segment.c | 83 ++++++++++++++++++++++++++++++++++++- 7 files changed, 108 insertions(+), 4 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index a5b3cc9d..df85a2c0 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -369,6 +369,7 @@ typedef enum mi_option_e { mi_option_visit_abandoned, // allow visiting heap blocks from abandoned threads (=0) mi_option_debug_guarded_min, // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects (=0) mi_option_debug_guarded_max, // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects (=0) + mi_option_target_segments_per_thread, // experimental (=0) _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index aff6a1bd..b4e74789 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -175,6 +175,8 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; / void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_page_force_abandon(mi_page_t* page); + void _mi_heap_delayed_free_all(mi_heap_t* heap); bool _mi_heap_delayed_free_partial(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 69f737b3..044d6eae 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -200,7 +200,7 @@ typedef int32_t mi_ssize_t; #define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 8KiB on 64-bit #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB on 64-bit #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) -#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 16MiB on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) // Maximum number of size classes. (spaced exponentially in 12.5% increments) diff --git a/src/arena-abandon.c b/src/arena-abandon.c index eaa8c7c9..84b9f72c 100644 --- a/src/arena-abandon.c +++ b/src/arena-abandon.c @@ -192,7 +192,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool vi else { // otherwise visit all starting at a random location if (abandoned_count > abandoned_list_count && max_arena > 0) { - current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); + current->start = 0; // (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); current->end = current->start + max_arena; } else { diff --git a/src/options.c b/src/options.c index 1cfb2f17..c97b9abe 100644 --- a/src/options.c +++ b/src/options.c @@ -100,6 +100,7 @@ static mi_option_desc_t options[_mi_option_last] = #endif { 0, UNINIT, MI_OPTION(debug_guarded_min) }, // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects { 0, UNINIT, MI_OPTION(debug_guarded_max) }, // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects + { 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable. }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/page.c b/src/page.c index 99ef3835..5671c7d4 100644 --- a/src/page.c +++ b/src/page.c @@ -405,6 +405,27 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { } +// force abandon a page; this is safe to call +void _mi_page_force_abandon(mi_page_t* page) { + mi_heap_t* heap = mi_page_heap(page); + // mark page as not using delayed free + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); + + // ensure this page is no longer in the heap delayed free list + _mi_heap_delayed_free_all(heap); + if (page->block_size == 0) return; // it may have been freed now + + // and now unlink it from the page queue and abandon (or free) + mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); + if (mi_page_all_free(page)) { + _mi_page_free(page, pq, false); + } + else { + _mi_page_abandon(page, pq); + } +} + + // Free a page with no more free blocks void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_internal(page != NULL); diff --git a/src/segment.c b/src/segment.c index 1f1dc006..bb7483f1 100644 --- a/src/segment.c +++ b/src/segment.c @@ -693,6 +693,8 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ // free previous slice -- remove it from free and merge mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); slice_count += prev->slice_count; + slice->slice_count = 0; + slice->slice_offset = (uint32_t)((uint8_t*)slice - (uint8_t*)prev); // set the slice offset for `segment_force_abandon` (in case the previous free block is very large). if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); } slice = prev; } @@ -1329,7 +1331,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice result = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); break; } - else if (segment->abandoned_visits > 3 && is_suitable) { + else if (segment->abandoned_visits > 3 && is_suitable && !mi_option_is_enabled(mi_option_target_segments_per_thread)) { // always reclaim on 3rd visit to limit the abandoned queue length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } @@ -1343,7 +1345,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice return result; } - +// collect abandoned segments void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) { mi_segment_t* segment; @@ -1367,6 +1369,80 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) _mi_arena_field_cursor_done(¤t); } +/* ----------------------------------------------------------- + Force abandon a segment that is in use by our thread +----------------------------------------------------------- */ + +// force abandon a segment +static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) +{ + mi_assert_internal(!mi_segment_is_abandoned(segment)); + + // for all slices + const mi_slice_t* end; + mi_slice_t* slice = mi_slices_start_iterate(segment, &end); + while (slice < end) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + if (mi_slice_is_used(slice)) { + // ensure used count is up to date and collect potential concurrent frees + mi_page_t* const page = mi_slice_to_page(slice); + _mi_page_free_collect(page, false); + { + // abandon the page if it is still in-use (this will free it if possible as well) + mi_assert_internal(segment->used > 0); + if (segment->used == segment->abandoned+1) { + // the last page.. abandon and return as the segment will be abandoned after this + // and we should no longer access it. + _mi_page_force_abandon(page); + return; + } + else { + // abandon and continue + _mi_page_force_abandon(page); + // it might be freed, reset the slice (note: relies on coalesce setting the slice_offset) + slice = mi_slice_first(slice); + } + } + } + slice = slice + slice->slice_count; + } + mi_assert(segment->used == segment->abandoned); + mi_assert(segment->used == 0); + if (segment->used == 0) { + // all free now + mi_segment_free(segment, false, tld); + } + else { + // perform delayed purges + mi_segment_try_purge(segment, false /* force? */, tld->stats); + } +} + + +// try abandon segments. +// this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use. +static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { + const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024); + if (target == 0 || tld->count <= target) return; + + const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% + + // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages + for (int i = 0; i < 16 && tld->count >= min_target; i++) { + mi_page_t* page = heap->pages[MI_BIN_FULL].first; + while (page != NULL && mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX) { + page = page->next; + } + if (page==NULL) { + break; + } + mi_segment_t* segment = _mi_page_segment(page); + mi_segment_force_abandon(segment, tld); + mi_assert_internal(page != heap->pages[MI_BIN_FULL].first); // as it is just abandoned + } +} + /* ----------------------------------------------------------- Reclaim or allocate ----------------------------------------------------------- */ @@ -1375,6 +1451,9 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_ { mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); + // try to abandon some segments to increase reuse between threads + mi_segments_try_abandon(heap,tld); + // 1. try to reclaim an abandoned segment bool reclaimed; mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld); From 19ce2c6461ffa63583f57c2558e9c7f9979dadaa Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 11 Oct 2024 10:44:43 -0700 Subject: [PATCH 239/352] restore randomization when trying to reclaim abandoned segments --- src/arena-abandon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena-abandon.c b/src/arena-abandon.c index 84b9f72c..eaa8c7c9 100644 --- a/src/arena-abandon.c +++ b/src/arena-abandon.c @@ -192,7 +192,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool vi else { // otherwise visit all starting at a random location if (abandoned_count > abandoned_list_count && max_arena > 0) { - current->start = 0; // (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); + current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); current->end = current->start + max_arena; } else { From 81da26d7d30c87bc0f094c91fbbae39513d2d35a Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 11 Oct 2024 10:52:35 -0700 Subject: [PATCH 240/352] make target test for stealing one less since we are about to reclaim_or_alloc a fresh segment --- src/segment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index bb7483f1..3d411f9c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1424,9 +1424,10 @@ static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* t // this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use. static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024); - if (target == 0 || tld->count <= target) return; + // we call this when we are about to add a fresh segment so we should be under our target segment count. + if (target == 0 || tld->count < target) return; - const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% + const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages for (int i = 0; i < 16 && tld->count >= min_target; i++) { From eda16d7c918b3f172de95bf0453edde6d249a321 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 29 Oct 2024 20:07:35 -0700 Subject: [PATCH 241/352] remove wrong assertion --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index c55e63b1..ed1cf921 100644 --- a/src/options.c +++ b/src/options.c @@ -63,6 +63,7 @@ typedef struct mi_option_desc_s { #define MI_DEFAULT_ARENA_EAGER_COMMIT 2 #endif +// in KiB #ifndef MI_DEFAULT_ARENA_RESERVE #if (MI_INTPTR_SIZE>4) #define MI_DEFAULT_ARENA_RESERVE 1024L*1024L @@ -197,7 +198,6 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma } mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { - mi_assert_internal(mi_option_has_size_in_kib(option)); const long x = mi_option_get(option); size_t size = (x < 0 ? 0 : (size_t)x); if (mi_option_has_size_in_kib(option)) { From 4f46cf7d5a0f7cbd30d0048babd3e67a4226ee53 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 29 Oct 2024 22:40:58 -0700 Subject: [PATCH 242/352] ensure we dont reclaim a segment on a free if that would go above the target segment count --- src/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/segment.c b/src/segment.c index 3d411f9c..66ac4bf7 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1261,6 +1261,8 @@ bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) { if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned if (segment->subproc != heap->tld->segments.subproc) return false; // only reclaim within the same subprocess if (!_mi_heap_memid_is_suitable(heap,segment->memid)) return false; // don't reclaim between exclusive and non-exclusive arena's + const long target = _mi_option_get_fast(mi_option_target_segments_per_thread); + if (target > 0 && (size_t)target <= heap->tld->segments.count) return false; // don't reclaim if going above the target count // don't reclaim more from a `free` call than half the current segments // this is to prevent a pure free-ing thread to start owning too many segments // (but not for out-of-arena segments as that is the main way to be reclaimed for those) From 826425d5ab84f93dc8970ecea9f942d0e32689a0 Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 2 Nov 2024 06:24:28 -0700 Subject: [PATCH 243/352] fix merge error, issue #955 --- test/main-override-static.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/main-override-static.c b/test/main-override-static.c index 07af1090..b2b6ee20 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -20,12 +20,9 @@ static void test_reserved(void); static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); -<<<<<<< HEAD static void test_heap_arena(void); static void test_align(void); -======= static void test_canary_leak(void); ->>>>>>> dev // static void test_large_pages(void); int main() { From c58990d4eb93ebe05699cc5e6fa1697a050213aa Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 22 Nov 2024 13:55:10 -0800 Subject: [PATCH 244/352] fix syntax error (issue #963) --- test/main-override-static.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/main-override-static.c b/test/main-override-static.c index b2b6ee20..ccaba543 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -252,6 +252,8 @@ static void test_heap_arena(void) { break; } } +} + static void test_canary_leak(void) { char* p = mi_mallocn_tp(char,23); for(int i = 0; i < 23; i++) { From 9c5c628f990735ffc2f626b1cb6d8f26cf8c4701 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 20 Dec 2024 12:58:46 -0800 Subject: [PATCH 245/352] merge from dev --- ide/vs2022/mimalloc-test.vcxproj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj index a8b36d5e..6e4576fd 100644 --- a/ide/vs2022/mimalloc-test.vcxproj +++ b/ide/vs2022/mimalloc-test.vcxproj @@ -272,14 +272,14 @@ Console + + + {abb5eae7-b3e6-432e-b636-333449892ea6} - - - From 34cdf1a49f092b90b76bbc6a71cb743e1f1985c6 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 25 Dec 2024 13:56:38 -0800 Subject: [PATCH 246/352] fix eager delayed setting --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index a93ea218..1390e77a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -886,7 +886,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) const bool eager_delay = (// !_mi_os_has_overcommit() && // never delay on overcommit systems _mi_current_thread_count() > 1 && // do not delay for the first N threads - tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (required > 0); From 17dd7e1901e850229aaf131e26f21b99ab49714a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 3 Jan 2025 18:45:00 -0800 Subject: [PATCH 247/352] bump version to 2.1.9 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 3d94fafc..f3ed36ab 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 2) set(mi_version_minor 1) -set(mi_version_patch 8) +set(mi_version_patch 9) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index fc6c75fc..bd91db43 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2025, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 218 // major + 2 digits minor +#define MI_MALLOC_VERSION 219 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 34b5d3c7792ec42260a197a595e3bb6ba3344c00 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 10 Jan 2025 09:53:11 -0800 Subject: [PATCH 248/352] update vcpkg hash --- contrib/vcpkg/portfile.cmake | 4 ++-- contrib/vcpkg/readme.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake index 058ce985..faa4c542 100644 --- a/contrib/vcpkg/portfile.cmake +++ b/contrib/vcpkg/portfile.cmake @@ -5,11 +5,11 @@ vcpkg_from_github( # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1). # REF "v${VERSION}" - REF be05b232e8a51e076aae6d8f4a5c3049ce51cb01 + REF 191ea046e4213e1a59652b3f4975219115ce1bed # The sha512 is the hash of the tar.gz bundle. # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=` and copy the sha from the error message.) - SHA512 24f640db050d6263e557fe9d024e6c0435762118605c0d04801efbcb32e96382b0b995000715fc0c2dcd67c67825a100a6690ecf0ef097b0a3ae107a82d74f7d + SHA512 d35926d368eb89b1688fafe22192c44e349ae78553a3a8def78bca847adff8a29e388a92027f03bd4fb6d6b4c906c70e9fd962bd539fbc8ef383fac95f64d8cd ) vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS diff --git a/contrib/vcpkg/readme.md b/contrib/vcpkg/readme.md index b1f6047c..014f2867 100644 --- a/contrib/vcpkg/readme.md +++ b/contrib/vcpkg/readme.md @@ -9,7 +9,7 @@ to check out a specific commit, version, or branch of mimalloc, or set further o You can install such custom port as: ```sh -$ vcpkg install mimalloc[override] --recurse --overlay-ports=./contrib/vcpkg +$ vcpkg install "mimalloc[override]" --recurse --overlay-ports=./contrib/vcpkg ``` This will also show the correct sha512 hash if you use a custom version. From e2db21e9ba9fb9172b7b0aa0fe9b8742525e8774 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 13 Jan 2025 16:55:56 -0800 Subject: [PATCH 249/352] remove INTERFACE_INCLUDE_DIRECTORIES --- contrib/vcpkg/portfile.cmake | 4 ++-- contrib/vcpkg/vcpkg-cmake-wrapper.cmake | 1 - contrib/vcpkg/vcpkg.json | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake index faa4c542..ca746763 100644 --- a/contrib/vcpkg/portfile.cmake +++ b/contrib/vcpkg/portfile.cmake @@ -5,11 +5,11 @@ vcpkg_from_github( # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1). # REF "v${VERSION}" - REF 191ea046e4213e1a59652b3f4975219115ce1bed + REF 03e501bddbf99a7a0688437172d914d079bc445a # The sha512 is the hash of the tar.gz bundle. # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=` and copy the sha from the error message.) - SHA512 d35926d368eb89b1688fafe22192c44e349ae78553a3a8def78bca847adff8a29e388a92027f03bd4fb6d6b4c906c70e9fd962bd539fbc8ef383fac95f64d8cd + SHA512 77bc7459baf517d8facfa7b3165709e392066091a47a5fa60498e6d9f3dcb308bc047fa743849a40803264c2f2f6e4c19e8e3ae04689f98a816bd1d0eed79ede ) vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS diff --git a/contrib/vcpkg/vcpkg-cmake-wrapper.cmake b/contrib/vcpkg/vcpkg-cmake-wrapper.cmake index 6b917347..1b355722 100644 --- a/contrib/vcpkg/vcpkg-cmake-wrapper.cmake +++ b/contrib/vcpkg/vcpkg-cmake-wrapper.cmake @@ -17,5 +17,4 @@ endif() if(TARGET mimalloc-static AND NOT TARGET mimalloc) add_library(mimalloc INTERFACE IMPORTED) set_target_properties(mimalloc PROPERTIES INTERFACE_LINK_LIBRARIES mimalloc-static) - set_target_properties(mimalloc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES mimalloc-static) endif() diff --git a/contrib/vcpkg/vcpkg.json b/contrib/vcpkg/vcpkg.json index bdbe9ba1..9adfc4f8 100644 --- a/contrib/vcpkg/vcpkg.json +++ b/contrib/vcpkg/vcpkg.json @@ -1,7 +1,7 @@ { "name": "mimalloc", - "version": "1.9.2", - "port-version": 2, + "version": "2.2.2", + "port-version": 1, "description": "Compact general purpose allocator with excellent performance", "homepage": "https://github.com/microsoft/mimalloc", "license": "MIT", From 0ef19762fec360e87d63ce91751cd3778d10ff05 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 13 Jan 2025 16:57:17 -0800 Subject: [PATCH 250/352] bump vcpkg sha --- contrib/vcpkg/portfile.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake index ca746763..9d1c2cd9 100644 --- a/contrib/vcpkg/portfile.cmake +++ b/contrib/vcpkg/portfile.cmake @@ -5,11 +5,11 @@ vcpkg_from_github( # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1). # REF "v${VERSION}" - REF 03e501bddbf99a7a0688437172d914d079bc445a + REF e2db21e9ba9fb9172b7b0aa0fe9b8742525e8774 # The sha512 is the hash of the tar.gz bundle. # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=` and copy the sha from the error message.) - SHA512 77bc7459baf517d8facfa7b3165709e392066091a47a5fa60498e6d9f3dcb308bc047fa743849a40803264c2f2f6e4c19e8e3ae04689f98a816bd1d0eed79ede + SHA512 8cbb601fdf8b46dd6a9c0d314d6da9d4960699853829e96d2470753867f90689fb4caeaf30d628943fd388670dc11902dbecc9cc7c329b99a510524a09bdb612 ) vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS From a24d71f37418c709dc0f3bdaaab06e53a5d4ca1c Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 2 Mar 2025 17:10:24 -0800 Subject: [PATCH 251/352] fix compile warning --- src/page.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/page.c b/src/page.c index 5b9e7c40..8db2463f 100644 --- a/src/page.c +++ b/src/page.c @@ -943,8 +943,8 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t } else */ { - mi_heap_stat_increase(heap, malloc_huge, bsize); - mi_heap_stat_counter_increase(heap, malloc_huge_count, 1); + _mi_stat_increase(&heap->tld->stats.malloc_huge, bsize); + _mi_stat_counter_increase(&heap->tld->stats.malloc_huge_count, 1); } } return page; From 2fc6b14bab0090eb84dd1f7a3f24d1e67918e3ff Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 6 Mar 2025 21:03:51 -0800 Subject: [PATCH 252/352] bump version to 1.9.3 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 7f3bd631..aeea621f 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 1) set(mi_version_minor 9) -set(mi_version_patch 2) +set(mi_version_patch 3) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 4e9c3156..8ccfcec3 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 192 // major + 2 digits minor +#define MI_MALLOC_VERSION 193 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 2b895f4e97aa089db3e9012708a5c26492cce88e Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 6 Mar 2025 21:04:32 -0800 Subject: [PATCH 253/352] bump version to 2.2.3 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 286ec0ba..daac7a5d 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 2) set(mi_version_minor 2) -set(mi_version_patch 2) +set(mi_version_patch 3) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index ae6ae262..ff6f0568 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 222 // major + 2 digits minor +#define MI_MALLOC_VERSION 223 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 4aae566191b9443d53995245b637ce28d617710a Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 10 Mar 2025 12:17:46 -0700 Subject: [PATCH 254/352] fix link error with msvc in C mode (issue #1030) --- ide/vs2022/mimalloc-lib.vcxproj | 2 +- include/mimalloc/atomic.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ide/vs2022/mimalloc-lib.vcxproj b/ide/vs2022/mimalloc-lib.vcxproj index abdac1d1..95b516ec 100644 --- a/ide/vs2022/mimalloc-lib.vcxproj +++ b/ide/vs2022/mimalloc-lib.vcxproj @@ -308,7 +308,7 @@ false false Default - CompileAsCpp + CompileAsC true stdcpp20 diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 6eaa6f99..2984f50f 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -266,6 +266,13 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int6 return current; #endif } +static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) { + const int64_t add = *padd; + if (add != 0) { + mi_atomic_addi64_relaxed((volatile _Atomic(int64_t)*)p, add); + } +} + static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { int64_t current; do { From f11732acdfe3e33f64f4aa3e7db657ffd80dea8f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 10 Mar 2025 12:39:09 -0700 Subject: [PATCH 255/352] set default compilation to c++ mode on msvc --- ide/vs2022/mimalloc-lib.vcxproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ide/vs2022/mimalloc-lib.vcxproj b/ide/vs2022/mimalloc-lib.vcxproj index 95b516ec..abdac1d1 100644 --- a/ide/vs2022/mimalloc-lib.vcxproj +++ b/ide/vs2022/mimalloc-lib.vcxproj @@ -308,7 +308,7 @@ false false Default - CompileAsC + CompileAsCpp true stdcpp20 From 9a35bca55645a131092a91797f851794423175f6 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 16:12:17 -0700 Subject: [PATCH 256/352] possible fix for wrong accounting of committed bytes (issue #1035) --- src/arena.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9d40a271..1f6f6d9d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -648,15 +648,16 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); + const bool decommitted_size = (committed_size <= size ? size - committed_size : 0); // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) mi_track_mem_undefined(p,size); if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through - if (!all_committed && committed_size > 0) { - // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) - _mi_stat_decrease(&_mi_stats_main.committed, committed_size); + if (!all_committed && decommitted_size > 0) { + // if partially committed, adjust the committed stats (as `_mi_os_free` will decrease commit by the full size) + _mi_stat_increase(&_mi_stats_main.committed, decommitted_size); } _mi_os_free(p, size, memid); } @@ -695,7 +696,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_track_mem_noaccess(p,size); if (committed_size > 0) { // if partially committed, adjust the committed stats (is it will be recommitted when re-using) - // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. + // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed. _mi_stat_decrease(&_mi_stats_main.committed, committed_size); } // note: if not all committed, it may be that the purge will reset/decommit the entire range From 26fa8be42759ac39f7b4869b4e0936bd35a8be17 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 18:50:53 -0700 Subject: [PATCH 257/352] improved accounting of committed bytes (issue #1035) --- include/mimalloc/internal.h | 16 +++++++++++ src/arena.c | 39 ++++++++++++++++--------- src/bitmap.c | 28 +++++++++++------- src/bitmap.h | 4 +-- src/libc.c | 57 +++++++++++++++++++++++++++++++++++++ src/stats.c | 1 + 6 files changed, 119 insertions(+), 26 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 106da0d1..5b3e7e23 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -127,6 +127,7 @@ bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size); bool _mi_os_commit(void* p, size_t size, bool* is_zero); +bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); bool _mi_os_decommit(void* addr, size_t size); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); @@ -947,6 +948,21 @@ static inline size_t mi_bsr(size_t x) { return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x)); } +size_t _mi_popcount_generic(size_t x); + +static inline size_t mi_popcount(size_t x) { + if (x<=1) return x; + if (x==SIZE_MAX) return MI_SIZE_BITS; + #if defined(__GNUC__) + #if (SIZE_MAX == ULONG_MAX) + return __builtin_popcountl(x); + #else + return __builtin_popcountll(x); + #endif + #else + return _mi_popcount_generic(x); + #endif +} // --------------------------------------------------------------------------------- // Provide our own `_mi_memcpy` for potential performance optimizations. diff --git a/src/arena.c b/src/arena.c index 1f6f6d9d..a7c20764 100644 --- a/src/arena.c +++ b/src/arena.c @@ -255,7 +255,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar // set the dirty bits (todo: no need for an atomic op here?) if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { - memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL); } // set commit state @@ -267,10 +267,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; bool any_uncommitted; - _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + size_t already_committed = 0; + _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed); if (any_uncommitted) { + mi_assert_internal(already_committed < needed_bcount); + const size_t commit_size = mi_arena_block_size(needed_bcount); + const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed); bool commit_zero = false; - if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) { + if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) { memid->initially_committed = false; } else { @@ -280,7 +284,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar } else { // no need to commit, but check if already fully committed - memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + size_t already_committed = 0; + memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed); + if (!memid->initially_committed && already_committed > 0) { + // partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted. + mi_assert_internal(already_committed < needed_bcount); + _mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed)); + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + } } return p; @@ -464,17 +475,19 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks) const size_t size = mi_arena_block_size(blocks); void* const p = mi_arena_block_start(arena, bitmap_idx); bool needs_recommit; - if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { + size_t already_committed = 0; + if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) { // all blocks are committed, we can purge freely + mi_assert_internal(already_committed == blocks); needs_recommit = _mi_os_purge(p, size); } else { // some blocks are not committed -- this can happen when a partially committed block is freed // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge - // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), - // and also undo the decommit stats (as it was already adjusted) + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory). + mi_assert_internal(already_committed < blocks); mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); - needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed)); } // clear the purged blocks @@ -508,7 +521,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t else { // already an expiration was set } - _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); + _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL); } } @@ -648,7 +661,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); - const bool decommitted_size = (committed_size <= size ? size - committed_size : 0); + const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0); // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) mi_track_mem_undefined(p,size); @@ -691,14 +704,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_assert_internal(arena->blocks_purge != NULL); if (!all_committed) { - // mark the entire range as no longer committed (so we recommit the full range when re-using) + // mark the entire range as no longer committed (so we will recommit the full range when re-using) _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); mi_track_mem_noaccess(p,size); - if (committed_size > 0) { + //if (committed_size > 0) { // if partially committed, adjust the committed stats (is it will be recommitted when re-using) // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed. _mi_stat_decrease(&_mi_stats_main.committed, committed_size); - } + //} // note: if not all committed, it may be that the purge will reset/decommit the entire range // that contains already decommitted parts. Since purge consistently uses reset or decommit that // works (as we should never reset decommitted parts). diff --git a/src/bitmap.c b/src/bitmap.c index 9ef784d6..50f4df2b 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -351,7 +351,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; @@ -359,28 +359,31 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_zero = true; bool any_zero = false; + size_t one_count = 0; _Atomic(size_t)*field = &bitmap[idx]; size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); - if ((prev & pre_mask) != 0) all_zero = false; + if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); } if ((prev & pre_mask) != pre_mask) any_zero = true; while (mid_count-- > 0) { prev = mi_atomic_or_acq_rel(field++, mid_mask); - if ((prev & mid_mask) != 0) all_zero = false; + if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); } if ((prev & mid_mask) != mid_mask) any_zero = true; } if (post_mask!=0) { prev = mi_atomic_or_acq_rel(field, post_mask); - if ((prev & post_mask) != 0) all_zero = false; + if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); } if ((prev & post_mask) != post_mask) any_zero = true; } if (pany_zero != NULL) { *pany_zero = any_zero; } + if (already_set != NULL) { *already_set = one_count; }; + mi_assert_internal(all_zero ? one_count == 0 : one_count <= count); return all_zero; } // Returns `true` if all `count` bits were 1. // `any_ones` is `true` if there was at least one bit set to one. -static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { +static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; @@ -388,30 +391,33 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_ones = true; bool any_ones = false; + size_t one_count = 0; mi_bitmap_field_t* field = &bitmap[idx]; size_t prev = mi_atomic_load_relaxed(field++); if ((prev & pre_mask) != pre_mask) all_ones = false; - if ((prev & pre_mask) != 0) any_ones = true; + if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); } while (mid_count-- > 0) { prev = mi_atomic_load_relaxed(field++); if ((prev & mid_mask) != mid_mask) all_ones = false; - if ((prev & mid_mask) != 0) any_ones = true; + if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); } } if (post_mask!=0) { prev = mi_atomic_load_relaxed(field); if ((prev & post_mask) != post_mask) all_ones = false; - if ((prev & post_mask) != 0) any_ones = true; + if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); } } if (pany_ones != NULL) { *pany_ones = any_ones; } + if (already_set != NULL) { *already_set = one_count; } + mi_assert_internal(all_ones ? one_count == count : one_count < count); return all_ones; } -bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) { + return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set); } bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool any_ones; - mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL); return any_ones; } diff --git a/src/bitmap.h b/src/bitmap.h index d60668cb..60b38815 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -102,9 +102,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set); -bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set); bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); #endif diff --git a/src/libc.c b/src/libc.c index 1bd97aa3..52d095eb 100644 --- a/src/libc.c +++ b/src/libc.c @@ -275,3 +275,60 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) { va_end(args); return written; } + + +#if MI_SIZE_SIZE == 4 +#define mi_mask_even_bits32 (0x55555555) +#define mi_mask_even_pairs32 (0x33333333) +#define mi_mask_even_nibbles32 (0x0F0F0F0F) + +// sum of all the bytes in `x` if it is guaranteed that the sum < 256! +static size_t mi_byte_sum32(uint32_t x) { + // perform `x * 0x01010101`: the highest byte contains the sum of all bytes. + x += (x << 8); + x += (x << 16); + return (size_t)(x >> 24); +} + +static size_t mi_popcount_generic32(uint32_t x) { + // first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10 + // in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair + // into the lower bit-pair: + x = x - ((x >> 1) & mi_mask_even_bits32); + // add the 2-bit pair results + x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32); + // add the 4-bit nibble results + x = (x + (x >> 4)) & mi_mask_even_nibbles32; + // each byte now has a count of its bits, we can sum them now: + return mi_byte_sum32(x); +} + +mi_decl_noinline size_t _mi_popcount_generic(size_t x) { + return mi_popcount_generic32(x); +} + +#else +#define mi_mask_even_bits64 (0x5555555555555555) +#define mi_mask_even_pairs64 (0x3333333333333333) +#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F) + +// sum of all the bytes in `x` if it is guaranteed that the sum < 256! +static size_t mi_byte_sum64(uint64_t x) { + x += (x << 8); + x += (x << 16); + x += (x << 32); + return (size_t)(x >> 56); +} + +static size_t mi_popcount_generic64(uint64_t x) { + x = x - ((x >> 1) & mi_mask_even_bits64); + x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64); + x = (x + (x >> 4)) & mi_mask_even_nibbles64; + return mi_byte_sum64(x); +} + +mi_decl_noinline size_t _mi_popcount_generic(size_t x) { + return mi_popcount_generic64(x); +} +#endif + diff --git a/src/stats.c b/src/stats.c index 1cfc3104..6a480816 100644 --- a/src/stats.c +++ b/src/stats.c @@ -30,6 +30,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { { // add atomically (for abandoned pages) int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); + // if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); }; mi_atomic_maxi64_relaxed(&stat->peak, current + amount); if (amount > 0) { mi_atomic_addi64_relaxed(&stat->total,amount); From 47bf3a5b1b5dd1f85a1ff75bb046f9f8e6dfcdb1 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 19:06:21 -0700 Subject: [PATCH 258/352] potential fix for sporadic assertion failure on random returning 0 (issue #1039) --- src/random.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/random.c b/src/random.c index 4fc8b2f8..f17698ba 100644 --- a/src/random.c +++ b/src/random.c @@ -143,13 +143,17 @@ void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { mi_assert_internal(mi_random_is_initialized(ctx)); - #if MI_INTPTR_SIZE <= 4 - return chacha_next32(ctx); - #elif MI_INTPTR_SIZE == 8 - return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); - #else - # error "define mi_random_next for this platform" - #endif + uintptr_t r; + do { + #if MI_INTPTR_SIZE <= 4 + r = chacha_next32(ctx); + #elif MI_INTPTR_SIZE == 8 + r = (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); + #else + # error "define mi_random_next for this platform" + #endif + } while (r==0); + return r; } @@ -163,7 +167,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { x ^= _mi_prim_clock_now(); // and do a few randomization steps uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; - for (uintptr_t i = 0; i < max; i++) { + for (uintptr_t i = 0; i < max || x==0; i++, x++) { x = _mi_random_shuffle(x); } mi_assert_internal(x != 0); @@ -179,7 +183,7 @@ static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); } #endif uintptr_t x = _mi_os_random_weak(0); - for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. + for (size_t i = 0; i < 8; i++, x++) { // key is eight 32-bit words. x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } From 1aa88e0d9ad631ce7ed737a41aca873a61534939 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 19:11:38 -0700 Subject: [PATCH 259/352] try to fix pipeline trigger --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index a803cd15..c4dc1627 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -7,9 +7,9 @@ trigger: branches: include: - master - - dev - - dev2 - dev3 + - dev2 + - dev tags: include: - v* From afbc581f8dfdc92f69faa2ec57e18128c54fcd44 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 19:16:10 -0700 Subject: [PATCH 260/352] add Windows x86 to the build pipeline --- azure-pipelines.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c4dc1627..25d4a6e0 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -34,6 +34,14 @@ jobs: BuildType: secure cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON MSBuildConfiguration: Release + Debug x86: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -A Win32 + MSBuildConfiguration: Debug + Release x86: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32 + MSBuildConfiguration: Release steps: - task: CMake@1 inputs: From b2dcab58f7d1696795bae0e5bf33ffc229662ee9 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 20:20:36 -0700 Subject: [PATCH 261/352] fix assertion failure (issue #1031) --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index e2730b7f..75f8dacb 100644 --- a/src/segment.c +++ b/src/segment.c @@ -523,7 +523,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se void _mi_segments_collect(bool force, mi_segments_tld_t* tld) { mi_pages_try_purge(force,tld); #if MI_DEBUG>=2 - if (!_mi_is_main_thread()) { + if (!_mi_is_main_thread() && force) { mi_assert_internal(tld->pages_purge.first == NULL); mi_assert_internal(tld->pages_purge.last == NULL); } From 7eafaa968598fc6b1261103f0f53b0db2bc56139 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 20:22:02 -0700 Subject: [PATCH 262/352] fix visibility warning (issue #1031) --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index 215eed20..8a48ae5e 100644 --- a/src/init.c +++ b/src/init.c @@ -95,7 +95,7 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- -mi_decl_hidden mi_decl_cache_align const mi_heap_t _mi_heap_empty = { +mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, MI_ATOMIC_VAR_INIT(NULL), 0, // tid From 660d749d77822e54b77acecf82f1aa8f348625ae Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 20:29:29 -0700 Subject: [PATCH 263/352] do not default to MI_DEBUG=2 in release mode builds even when NDEBUG is not defined by defininig MI_BUILD_RELEASE (issue #1037) --- CMakeLists.txt | 4 +++- include/mimalloc/types.h | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d780fa1..2b1292cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -532,7 +532,9 @@ if(MI_TRACK_ASAN) endif() string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) list(APPEND mi_defines "MI_CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE_LC}") #todo: multi-config project needs $ ? -if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$")) +if(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$") + list(APPEND mi_defines MI_BUILD_RELEASE) +else() set(mi_libname "${mi_libname}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version endif() diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 9f743149..5bcdb07f 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -66,10 +66,10 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_DEBUG 2 // + internal assertion checks // #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) #if !defined(MI_DEBUG) -#if !defined(NDEBUG) || defined(_DEBUG) -#define MI_DEBUG 2 -#else +#if defined(MI_BUILD_RELEASE) || defined(NDEBUG) #define MI_DEBUG 0 +#else +#define MI_DEBUG 2 #endif #endif From 891f9f4cf6afbd213d7260b880795af523646c11 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 20:38:19 -0700 Subject: [PATCH 264/352] fix conflict marker (issue #1038) --- SECURITY.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 0ad51aa0..b3c89efc 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -28,11 +28,7 @@ Please include the requested information listed below (as much as you can provid This information will help us triage your report more quickly. -<<<<<<< HEAD -If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. -======= If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. ->>>>>>> dev-slice ## Preferred Languages From cf08c27d2b6b82232dcfc0882642b5983efd95f9 Mon Sep 17 00:00:00 2001 From: Jo Bates <29763794+jbatez@users.noreply.github.com> Date: Thu, 20 Mar 2025 11:24:59 -0700 Subject: [PATCH 265/352] support MI_OPT_ARCH when using CMAKE_OSX_ARCHITECTURES with non-Apple Clang --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b1292cc..b7154b20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -434,7 +434,7 @@ endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") if(MI_OPT_ARCH) - if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) + if(APPLE AND CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a") endif() From 01ee3568c1a2d82779887577e4427b8d65df47ce Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 16:19:54 -0700 Subject: [PATCH 266/352] name anonymous mmap address ranges for debugging on Linux (based on PR #1032 by @zhuker) --- src/prim/unix/prim.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 8e3180e6..994dbb93 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -205,14 +205,24 @@ static int unix_madvise(void* addr, size_t size, int advice) { return (res==0 ? 0 : errno); } -static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { +static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) { + void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */); + #if (defined(__linux__) || defined(__ANDROID__)) + if (p!=MAP_FAILED && p!=NULL) { + prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc"); + } + #endif + return p; +} + +static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { MI_UNUSED(try_alignment); void* p = NULL; #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB - p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + p = unix_mmap_prim(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { int err = errno; _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); @@ -223,7 +233,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p } #elif defined(MAP_ALIGN) // Solaris if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + p = unix_mmap_prim((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd); // addr parameter is the required alignment if (p!=MAP_FAILED) return p; // fall back to regular mmap } @@ -233,7 +243,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p if (addr == NULL) { void* hint = _mi_os_get_aligned_hint(try_alignment, size); if (hint != NULL) { - p = mmap(hint, size, protect_flags, flags, fd, 0); + p = unix_mmap_prim(hint, size, protect_flags, flags, fd); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? int err = 0; @@ -248,7 +258,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p } #endif // regular mmap - p = mmap(addr, size, protect_flags, flags, fd, 0); + p = unix_mmap_prim(addr, size, protect_flags, flags, fd); if (p!=MAP_FAILED) return p; // failed to allocate return NULL; @@ -319,7 +329,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; - p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) { mi_huge_pages_available = false; // don't try huge 1GiB pages again @@ -327,7 +337,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); } lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); - p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd); } #endif if (large_only) return p; @@ -340,7 +350,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec // regular allocation if (p == NULL) { *is_large = false; - p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); + p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, flags, fd); if (p != NULL) { #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead From 6ed451c555da0725bd660440a584188370f46b8b Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 16:48:50 -0700 Subject: [PATCH 267/352] fix linux compile by including linux/prctl.h --- src/prim/unix/prim.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 994dbb93..32004fe4 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -31,11 +31,12 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include + #include // PR_SET_VMA //#if defined(MI_NO_THP) - #include // THP disable + #include // THP disable //#endif #if defined(__GLIBC__) - #include // linux mmap flags + #include // linux mmap flags #else #include #endif @@ -207,7 +208,7 @@ static int unix_madvise(void* addr, size_t size, int advice) { static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) { void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */); - #if (defined(__linux__) || defined(__ANDROID__)) + #if (defined(__linux__) && defined(PR_SET_VMA)) if (p!=MAP_FAILED && p!=NULL) { prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc"); } From 02607f2b8d6fa70dfa632d3851930dadeeb5079f Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 17:22:36 -0700 Subject: [PATCH 268/352] reduce test sizes for 32-bit --- test/test-api.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test-api.c b/test/test-api.c index 15484544..6f5d6722 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -203,7 +203,11 @@ int main(void) { CHECK_BODY("malloc-aligned9") { // test large alignments bool ok = true; void* p[8]; - size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 }; + size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, + #if SIZE_MAX > UINT32_MAX + 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, + #endif + 0 }; for (int i = 0; i < 28 && ok; i++) { int align = (1 << i); for (int j = 0; j < 8 && ok; j++) { From d48bafe2bb63120c1327fe61a13aafd893c97760 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 19:21:41 -0700 Subject: [PATCH 269/352] print statistics nicer --- src/stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/stats.c b/src/stats.c index 6a480816..70f16ef3 100644 --- a/src/stats.c +++ b/src/stats.c @@ -215,7 +215,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* static void mi_print_header(mi_output_fun* out, void* arg ) { - _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "unit ", "total# "); + _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "block ", "total# "); } #if MI_STAT>1 @@ -284,10 +284,10 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) // and print using that mi_print_header(out,arg); #if MI_STAT>1 - mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "normal",out,arg); + mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "bin",out,arg); #endif #if MI_STAT - mi_stat_print(&stats->malloc_normal, "normal", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg); + mi_stat_print(&stats->malloc_normal, "binned", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg); mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg); mi_stat_count_t total = { 0,0,0 }; mi_stat_count_add_mt(&total, &stats->malloc_normal); @@ -295,7 +295,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_print_ex(&total, "total", 1, out, arg, ""); #endif #if MI_STAT>1 - mi_stat_print_ex(&stats->malloc_requested, "malloc req", 1, out, arg, ""); + mi_stat_peak_print(&stats->malloc_requested, "malloc req", 1, out, arg); _mi_fprintf(out, arg, "\n"); #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); From a077311a5ec418e2e11c5cb99b82a41c188045b3 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 19:40:44 -0700 Subject: [PATCH 270/352] improve tracking of malloc_requested count --- include/mimalloc/types.h | 5 ++++- src/alloc-aligned.c | 3 ++- src/alloc.c | 17 ++++------------- src/free.c | 14 ++++++-------- src/heap.c | 10 +++++----- src/stats.c | 28 +++++++++++++++++++++++++++- 6 files changed, 48 insertions(+), 29 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 5bcdb07f..ab697f23 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -625,22 +625,25 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line // add to stat keeping track of the peak void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount); // counters can just be increased void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #if (MI_STAT) #define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) #define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) +#define mi_stat_adjust_decrease(stat,amount) _mi_stat_adjust_decrease( &(stat), amount) #define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) #else #define mi_stat_increase(stat,amount) ((void)0) #define mi_stat_decrease(stat,amount) ((void)0) +#define mi_stat_adjust_decrease(stat,amount) ((void)0) #define mi_stat_counter_increase(stat,amount) ((void)0) #endif #define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) - +#define mi_heap_stat_adjust_decrease(heap,stat,amount) mi_stat_adjust_decrease( (heap)->tld->stats.stat, amount) #endif diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index d0e691b3..e28cb0de 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -191,10 +191,11 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; if mi_likely(is_aligned) { + void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen #if MI_STAT>1 + mi_heap_stat_adjust_decrease(heap, malloc_requested, padsize); mi_heap_stat_increase(heap, malloc_requested, size); #endif - void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); mi_track_malloc(p,size,zero); diff --git a/src/alloc.c b/src/alloc.c index 15867315..0c4e4391 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -30,6 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file // Note: in release mode the (inlined) routine is about 7 instructions with a single test. extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { + mi_assert_internal(size >= MI_PADDING_SIZE); mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); // check the free list @@ -88,6 +89,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ #if (MI_STAT>1) const size_t bin = _mi_bin(bsize); mi_heap_stat_increase(heap, malloc_bins[bin], 1); + mi_heap_stat_increase(heap, malloc_requested, size - MI_PADDING_SIZE); #endif } #endif @@ -146,12 +148,6 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); - #if MI_STAT>1 - if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } - mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); - } - #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); @@ -188,12 +184,6 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_track_malloc(p,size,zero); - #if MI_STAT>1 - if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } - mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); - } - #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); @@ -666,7 +656,8 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } #if MI_STAT>1 - mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); + mi_heap_stat_adjust_decrease(heap, malloc_requested, req_size); + mi_heap_stat_increase(heap, malloc_requested, size); #endif _mi_stat_counter_increase(&heap->tld->stats.malloc_guarded_count, 1); } diff --git a/src/free.c b/src/free.c index a1732e8c..7e529530 100644 --- a/src/free.c +++ b/src/free.c @@ -514,20 +514,18 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { // only maintain stats for smaller objects if requested #if (MI_STAT>0) static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { -#if (MI_STAT < 2) MI_UNUSED(block); -#endif mi_heap_t* const heap = mi_heap_get_default(); const size_t bsize = mi_page_usable_block_size(page); -#if (MI_STAT>1) - const size_t usize = mi_page_usable_size_of(page, block); - mi_heap_stat_decrease(heap, malloc_requested, usize); -#endif + // #if (MI_STAT>1) + // const size_t usize = mi_page_usable_size_of(page, block); + // mi_heap_stat_decrease(heap, malloc_requested, usize); + // #endif if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, malloc_normal, bsize); -#if (MI_STAT > 1) + #if (MI_STAT > 1) mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], 1); -#endif + #endif } else { const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc diff --git a/src/heap.c b/src/heap.c index 7c235a7b..0ea9a2ff 100644 --- a/src/heap.c +++ b/src/heap.c @@ -331,17 +331,17 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ if (bsize > MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, malloc_huge, bsize); } -#if (MI_STAT) + #if (MI_STAT>0) _mi_page_free_collect(page, false); // update used count const size_t inuse = page->used; if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, malloc_normal, bsize * inuse); -#if (MI_STAT>1) + #if (MI_STAT>1) mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], inuse); -#endif + #endif } - mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks... -#endif + // mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks... + #endif /// pretend it is all free now mi_assert_internal(mi_page_thread_free(page) == NULL); diff --git a/src/stats.c b/src/stats.c index 70f16ef3..07ce7d16 100644 --- a/src/stats.c +++ b/src/stats.c @@ -62,6 +62,25 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { } +static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) { + if (amount == 0) return; + if mi_unlikely(mi_is_in_main(stat)) + { + // adjust atomically + mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_addi64_relaxed(&stat->total,amount); + } + else { + // adjust local + stat->current += amount; + stat->total += amount; + } +} + +void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) { + mi_stat_adjust(stat, -((int64_t)amount)); +} + // must be thread safe as it is called from stats_merge static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) { @@ -199,6 +218,13 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int _mi_fprintf(out, arg, "\n"); } +static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { + _mi_fprintf(out, arg, "%10s:", msg); + _mi_fprintf(out, arg, "%12s", " "); // no peak + mi_print_amount(stat->total, unit, out, arg); + _mi_fprintf(out, arg, "\n"); +} + static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); @@ -295,7 +321,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_print_ex(&total, "total", 1, out, arg, ""); #endif #if MI_STAT>1 - mi_stat_peak_print(&stats->malloc_requested, "malloc req", 1, out, arg); + mi_stat_total_print(&stats->malloc_requested, "malloc req", 1, out, arg); _mi_fprintf(out, arg, "\n"); #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); From 26b792d93b4e8f389a5c724feeabb86038b39e53 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 20:07:16 -0700 Subject: [PATCH 271/352] fix aligned malloc_requested statistic --- src/alloc-aligned.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index e28cb0de..8d2bde74 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -192,10 +192,6 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t if mi_likely(is_aligned) { void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen - #if MI_STAT>1 - mi_heap_stat_adjust_decrease(heap, malloc_requested, padsize); - mi_heap_stat_increase(heap, malloc_requested, size); - #endif mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); mi_track_malloc(p,size,zero); From 34cc5c8fd9e84fbfdfa45ed5db5b09f74a448a3b Mon Sep 17 00:00:00 2001 From: Peiyuan Song Date: Mon, 24 Mar 2025 09:39:42 +0800 Subject: [PATCH 272/352] remove the `lib` prefix when enabling mimalloc-redirect for mingw --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b7154b20..283af66d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -594,6 +594,9 @@ if(MI_BUILD_SHARED) # install(FILES "$/${mi_libname}.dll.pdb" DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() if(WIN32 AND MI_WIN_REDIRECT) + if(MINGW) + set_property(TARGET mimalloc PROPERTY PREFIX "") + endif() # On windows, link and copy the mimalloc redirection dll too. if(CMAKE_GENERATOR_PLATFORM STREQUAL "arm64ec") set(MIMALLOC_REDIRECT_SUFFIX "-arm64ec") From 797ca19ba93bb92f9c7c97923aa3e43485cbb3de Mon Sep 17 00:00:00 2001 From: Maksim Bondarenkov <119937608+ognevny@users.noreply.github.com> Date: Mon, 24 Mar 2025 08:35:15 +0300 Subject: [PATCH 273/352] cmake: don't change properties of import lib on Windows/MinGW CMake handles import lib for it automatically, and using `.dll.lib` extension is MSVC-specific hack --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b7154b20..46435eca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -584,7 +584,7 @@ if(MI_BUILD_SHARED) install(TARGETS mimalloc EXPORT mimalloc ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir}) - if(WIN32) + if(WIN32 AND NOT MINGW) # On windows, the import library name for the dll would clash with the static mimalloc.lib library # so we postfix the dll import library with `.dll.lib` (and also the .pdb debug file) set_property(TARGET mimalloc PROPERTY ARCHIVE_OUTPUT_NAME "${mi_libname}.dll" ) From 632eab958bb91fb8bd273efe58995023e5087aaa Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 25 Mar 2025 16:02:29 -0700 Subject: [PATCH 274/352] fix for atomic_yield on arm 32-bit, issue #1046 --- include/mimalloc/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 2984f50f..c0425f67 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -380,7 +380,7 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { __asm__ volatile("wfe"); } -#elif (defined(__arm__) && __ARM_ARCH__ >= 7) +#elif (defined(__arm__) && __ARM_ARCH >= 7) static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } From 23fbee7ec69e9bf336a236ad7e85e0bbe41cfb5d Mon Sep 17 00:00:00 2001 From: Vincent Fazio Date: Fri, 28 Mar 2025 07:58:49 -0500 Subject: [PATCH 275/352] atomic: fix mi_atomic_yield for big-endian arm32 Previously, `mi_atomic_yield` would not be defined on ARM32 big-endian architectures if they were pre-ARMv7. Rework the #ifdef guard to be more readable and collapse the ARM guards so both little and big endian are handled via the same mechanism. Now, ARMv7+ will utilize `yield` while older targets will use `nop` regardless of endianness. Signed-off-by: Vincent Fazio --- include/mimalloc/atomic.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index c0425f67..6289dc54 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -370,8 +370,9 @@ static inline void mi_atomic_yield(void) { _mm_pause(); } #elif (defined(__GNUC__) || defined(__clang__)) && \ - (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ - defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) + (defined(__x86_64__) || defined(__i386__) || \ + defined(__aarch64__) || defined(__arm__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); @@ -380,10 +381,16 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { __asm__ volatile("wfe"); } -#elif (defined(__arm__) && __ARM_ARCH >= 7) +#elif defined(__arm__) +#if __ARM_ARCH >= 7 static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } +#else +static inline void mi_atomic_yield(void) { + __asm__ volatile ("nop" ::: "memory"); +} +#endif #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) #ifdef __APPLE__ static inline void mi_atomic_yield(void) { @@ -394,10 +401,6 @@ static inline void mi_atomic_yield(void) { __asm__ __volatile__ ("or 27,27,27" ::: "memory"); } #endif -#elif defined(__armel__) || defined(__ARMEL__) -static inline void mi_atomic_yield(void) { - __asm__ volatile ("nop" ::: "memory"); -} #endif #elif defined(__sun) // Fallback for other archs From 1052c30f034017c67e5eea0ab45e032feb1e4e1e Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 28 Mar 2025 13:09:24 -0700 Subject: [PATCH 276/352] fix parenthesis in #if condition --- include/mimalloc/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 6289dc54..39ff5c90 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -372,7 +372,7 @@ static inline void mi_atomic_yield(void) { #elif (defined(__GNUC__) || defined(__clang__)) && \ (defined(__x86_64__) || defined(__i386__) || \ defined(__aarch64__) || defined(__arm__) || \ - defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) + defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); From b843fead226505f8aaba05724880a3e99cd500c8 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 28 Mar 2025 13:11:37 -0700 Subject: [PATCH 277/352] update mimalloc-redirect to v1.3.3; fix issue #1049 --- bin/mimalloc-redirect-arm64.dll | Bin 61440 -> 61440 bytes bin/mimalloc-redirect-arm64.lib | Bin 2976 -> 2976 bytes bin/mimalloc-redirect-arm64ec.dll | Bin 104960 -> 104960 bytes bin/mimalloc-redirect-arm64ec.lib | Bin 3308 -> 3308 bytes bin/mimalloc-redirect.dll | Bin 59904 -> 60416 bytes bin/mimalloc-redirect.lib | Bin 2874 -> 2874 bytes bin/mimalloc-redirect32.dll | Bin 38912 -> 38912 bytes bin/mimalloc-redirect32.lib | Bin 2928 -> 2928 bytes 8 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 bin/mimalloc-redirect-arm64.dll mode change 100644 => 100755 bin/mimalloc-redirect-arm64.lib mode change 100644 => 100755 bin/mimalloc-redirect-arm64ec.dll mode change 100644 => 100755 bin/mimalloc-redirect-arm64ec.lib mode change 100644 => 100755 bin/mimalloc-redirect.dll mode change 100644 => 100755 bin/mimalloc-redirect.lib mode change 100644 => 100755 bin/mimalloc-redirect32.dll mode change 100644 => 100755 bin/mimalloc-redirect32.lib diff --git a/bin/mimalloc-redirect-arm64.dll b/bin/mimalloc-redirect-arm64.dll old mode 100644 new mode 100755 index e636028591dd8bcf7614a6cd3202121c1913df81..27172d2c0d5f24e2b1a4cccc931bbf530c20860c GIT binary patch delta 7098 zcmbVRe^gslwmv5WO>C*qmXelM5`hwEX+vuzXlujtS84@QT3be~4FxoG3Z+ODtd*ck zAFe)UxE{x{{IN{Y)8m;i0S%%e_%F;FSKxZ7@JasNs-%EY34%N{i9g+8)dr!1w zmj67~y7xQ#JA3c5_c{BVdvkHiac()Tbv>1(4V&y=UBCa+H)`_Uh+Sv@YB}?oB3s@# z4J^K}?#!zS=AAjC;P2m1ajhbkRF%89tzdAVBX*&~*L@RDPqX_I zU!k9cu1R`RJ0AmH5&bCEfxVyHpX`00AQGp`!So-IHDovZjq0N8`9<4klzqJDPU>Z= zQc`q@>>oEQ=OrNDX^TF@veVMnWyX=`B5}lz`5QGywNW(!wIB!zBC^-^eS~hz z7PtJCHK!ToVN_m8W?n(araNu9QBW>If;zfFLMl8y{AyeDlUr@k2_&W1HM0YmJLp8HKXVV&RB70}oF&Ydb(D^==d+&BoK0Y< z<^sBt-DOTIphow5atD>3{QJyFzL6v4q*0d$e1J4Yz(>+Zl*c2VC6Yr+o}-0aKVE8N zLuT__G$zt)QG)z)*+p|hB8bQeZb2ovoJyP7(d?V%oK2AUyz+C01+#C`lo9rE`eHVd zoj@OE2|2eWo4=bm$qR=MW;XHhqlrOve#P zP9aexnDazB_l*dCEop2qBKUj+4?eO0p`;-Aaw176&hux9^a^>nU*-c z65;f*A=P&(?6OEyPPIkfoEEXc7wk$sR z(8`Rt2%n!LgxZFhxWZg4vNDNTwrDle=cdwHPP2mC)WrwkU$>s@LLoFpaNy{U5Q!V! zr)V_ireUGHx!I|2E49hzaQfJ)BIdcpN^H5DDia=3&w5u|?=$ z7X-~To^R3beer$s$u1&IY$DPcz67z(*oKQ>Y4`^7U@P&>syl9b{{lF^*CVOyi&ZJ} zu>h)M8zShPKK2`ymXOR=uD+N4iaovhChhP7NyCO$ub>_5;_6%Jb1d7^qAi;*{g%CM zv1?5Wq*ocu+e?etPjO$vPUmG&Gy9J`hRUj5ldie^5BBVuRaD1@*PK}Y*j(lO%86i- zBB?k$aFX7R*z96|lI9fYiECEbK7$KI%gU8rqs9@Uz(QV09pO*RFehZTgj3X}m(`h_%^;t;e_EUQ_}W4`D<(@+op+UHF1V z{wCJsdJ?@b^U8a?V$!Jof65W&h$6#=SJ4Jk6#PYY*&6vn0Z+Ung@f-2#3L`j;=PM) z#}7)(s#09+wSvu>Z@vs&F8E12UCTCXY>6LEkv^Ve?`*8od^H)m&Gu>>y^USkw3;4Z zrp+hlUKZGF)DAC``a)xyJrqx=x~;3V!#7CztZQqA#^hq}Zv8#j7q;CR-<&3Weu@2g z+by(ZeRx)go zUSdhxcOb%j+m|k+H%dpom^sOx3Es;yC$X(dUI{(3eZK~~^5PD&)@zX7VT#PVyMg#s4bUnW7|vaf!I}I)D? z758__% zZADAmX7Im7B6 z>nqFa_f*x@A1ymnR9Stv4pVezd`bPGokuI{8n@IuR8@Dlrn<7Geot9lRoQ_N)k2o6^9P3!X;9Ie?(JU zzo)Y9!K%YG#Yd|Tpv7ux?hcLCjB4p5TYPjQexiAstdvO)ZKQH&?4gG#)rC$!nopU# z(T3-Z^thGk{z}|@`dgUkXg&?9-L;SEs7E1M6ha>qS|bWMu9D+|oK#42sQJl!s>flW z_%pHzcRnR5X%(bHAss5|5~Q1xq1qNJ)wc<1P|@?LdZU7zRLF6a)Ll}CF|*;P@{v{E z&oQ=5kWPiPs^p`BL?%xkruyfE=uyOFK_Lzb91*BYbLgX&?xCg+1r-rEd8G>UA{b{c zHIXmXh;#xC0>y$6S7_oTs96MU6X=+wW>?hWD>%(AvQ+9;2z{&2^MsE45@`$yaYPYA zrEM)VwD6pZE;D|mhG7wC6L>{JrGjz^ti4j@hlJdMZWZW@$rb7dlWIa?L5~WI2pkk@ zhS1pSrPTO0)hJP5Os*ALql&XUtLU~1j7{FuLDf7aJ7exb!+)|N#}k>O$OdXY!-obfw6GIq2_+9y-Uyy z0^PIJL?yKf+9xnPOHEe2$%38~sQX3@Lg!MkO;OVu1#Jyo>N`wL4k5Y(x&_8UB_Vna zq51?pA}}&b&8Vockyj1Wjs@ON7hC?W4r>z_bLdgJ+2HF2T8k!XH`X(%^qAU&fluI| zK+6NF!@*GDzagSgLCXTCVtBu*=2q1br&Q4>&?3+&yhRkX#r7C2FcuRI#okgXS*|rT zh@Mt~!I*`ZUb{L`qk@hIoD>*y7&{%JYFGw=7J*kUmdP$?r$D#BHZf?eGH9|#&|!hf zV4-Jv?^Pm~W9(%y6#^5hRJl!!+%9OBz*d0{p*8}wSLWqST}zeP*6CD?C01LldW*?M z(Qj5amqm!SS!ThStIZsu->RB9g;+bwEOC~ZTl8C0vsNMcW|@^9zIwE<=(j0mxuZgq zXPM2;ZHo7Y@=ola**d*C4F-X+^k?T~4w*i@lV)3mnthh~?0ia9b(c_Un58~DkJeDq zUq-cByg<5H{HOnt#NP#jcttFTm*XNP{2F2}e|oa153CQbkk!-~XUHdHJ3jb2fgbQO z@Yht1^Ar%WkADchg^)_(q%Pn^;?$JhN=Olvsc$_YJ?M9AAmnSH{Wd~gMt=nOH00Wi zgxm%HBQ`=dz+P8K$TGZ-nF~o1cc^Ivu|+FuWPFbPCFF*!gmghR?jQuOO0*4l6o!d+ z;0p#ECKeI$B;+<=HZTk{Y2-NmuA2$j4CDc~0M`M-z~eyMP9z%Vj0^52l%u;@L0e^lX}v}qCz?!bT_9DHCqFdJA4V6#cQESGcE|6Je<(rXh8k_6Q_NsBmvc~5_)*0> zuscZ?5c`NULSeL)kYr3@1o$*E#AEyn4bA)fGHnX?jPww{wb$0$&>QRHakXG>>m zr>AqOi=5J(GMwVBTe`Vh(&O~I{0;u`Ub%O&*AXZUI0LRgZD6E-w12!`?vM1tsZ2;I zz~-@g98I25kJB^g4SPqumJVx&t;62o=#?qK&| zcVdsB$K2E6Z}qqNJ$|2`^y&H%`wV@?KKvFT7B~+3NBradh=0;=?X&ec`kZ~PzF=T5 zFcKIK$N}?!WxzJz7%0W!Qj%eYfyd+X40=X9hIV7SrQO!v+R@hG>lp0NbtQHgyDVKO HmvQ=kBa@Zy delta 6972 zcmb_he{@q-p8wvLWK7gzYQcsU=!-3FTKSRkLkft#%CCTep@4M!MM|l(43vn~Q{-S1 zJUVeWshBe?(RwT;C6R)HWi%-Ge=Ez)rK8pzxTZx zh3xUa?Kv->`~BSS&-?4$q@I+{lhXNbArtbNwT`pbz4VdXKJ{wu`S`QUE6=KO=GDJL zY&-wuE6=Go^@?A`8Lw*ig(^3+HMgp^NsG>Xdrbc8F;mhD@IC+x(uYXVY^NTZxxY98 zn*iYO0L-D2!U8=u9J004w5X${vjervbc$4wKhfDj6%lDYema#l;Utmm#%J~S-+p#o zwi-O|n2woVYz4>|y_K?wm`)hM(_ka8GQv32DI+lDBCx~8bg$H>FX?X-yp_tE`nv^k zmX=&qz7-8>P^sqF)i1W8gKBhIO|Cv+B#D)mJ2e_W?#F)HS$+0{OP7js!SWKKhpwJQ z16Lhk&W*3(=X=I-{Jdb?H4g)wzfQ^;iRo>0HvWGMO_enm8@o@iIMt~B7gWbw-mXUV zyd3Eprb;Y?{@GOX04v!lO|1pkVF8%=J(;OPmDYWKRo`Q$9J=}|1Ci;2=9kD>95Ctmt~*Cd7$skip{d_Tbb4wYjF47wFd<7lM} zIAKrHEybb<1rdd#1$65*PWo1HF=HfTcB_nXv*q^!k(F;sdWqnzQhMj~6H-LC%o`NE zHS~YxRk9_Jx=YG#pw(sN%NOh5xwH3yxv5Gv0{978DyOcIvXmaX@d?sNN%^nz-kI{J z^h|j<8K)nV-(o#IQ%;vd7We2(8=I|3O`a^5vP=dmT~+ZOsiB4QD;9cZ$?3(H>5gjh zM@3Th++r#F;c6*6iAAY)gzlMt2We7b^Y13)UTUcFlWp{cswV{93|eYiNp{ekwzBms zOsbd21=qoIMD)CqI!EN=Utc=O_H()!ju_2|``}0#`E(g%Cs8-`Cdh1>Nj=jy$dA$? zo9$X`OqNMmz+uqQNn58GiS$iqQH~bPL@uOl)yuBZ&5+sf(n$3((o8?7t|DF3P_vp; z()yYreJz3K=(US1)LpZT5E`!e*|k_p+&CBg;)K$=o2t9kn#M04h5m~=kWb%L|D3!7ok4pWQ)|%96@YIBhM@O`r*dUd%M;q7*%qxZC61#Zm zB-=*(fojJ@pZB^*A z3Votq7fuk%kL~g%1MVq2ql_}hOt~4A7nA#`bzw2iHyY9{3(LqlwdGwnt9VRZ-WQMl zR?T+u2~c;^oAmU;UBpgJi>mOmcF{_@e{(@s)IFvaFE!58IMWto=1*t*fdTM>r1e{V(eQ3*Q@==tNo?Y89@Eq&eH~yrAHrF zyodaOjxJuNuP>6#G`sjFa+=P!uO>aT$?nsS7Rv9^PwWo8_j)-&SJv(#_fZ)?C3?QL zirh>Kmrznj!%NBqHvD!8 zDv71FMDNwfL-fmKcaY!F`sGE!pRb{JFMo`zQpT4T>gi*vZWSuOP=2%Ob^|W)i?{g< z^~LgxDf$!Xuuwjwd?UTABW`+n-D2`fI=Suy`7u4XUewpmmETw7^#Ov%{F04}_4V`Q zeY9m`h2Y&sPi*{GWbbjTHjpy89w#D7cm|? zR+ayX)^74ypY?o8D@XFVKI#^F^WP0)2GL27z3lzuU4EkK2N+9eOfb{uzC8 z>ki@LP4qvvmIxo4l&`nGAn1eT^7~ZYR!V|&Xq%`f74qwJeA^n`b@Jo1u)%KdR>-rb z=!S;Vi&I659q-irN!!Cj+V+#;S4;@t|mBgHg6SuX~)ln(K5Pv=PErB=TG*WpzC+9r}s4fi9~7r-iGp|u|Tty(E1!& ze~6Z<2P{MNP(WYVyLSF4zUmBcI*99}`EzYlH{t1P8`Z0qCI>r-N9nwlUDVsMqQZ=# z`35{Jg|he5cFAS!d0HDUn3*o|3AUvHmT}y`(Z%sGj?Zumar_64nzit2pq%D4&SE{nK~acl_QLgcnLGY&T)_>Ul` zxxWGYCIWsB5_=wgSW_V2q-*vycg+Wvc~VNu|LEMga}R3mI@VUT_{SyuW6c^2j{^e+ z{!C+8*CHq4GkqhHq>I6g8y%!2?r(8IyDgG~o$S?jUuSy{Qi z_n*L)N4A#yzs>EF`i71Jo7&qt>f0Y~+um`wtsUPlF zJDi6Pb+j~h+}(D#QX*!e@9=C{1iNF{L&0@{Vp|hH(^Vee0nJ% z({%LV+YI8j1g(2SGB~~yYc#G#Smlom*0q!K?~M@;u|s#O_(`VcX7aM7cRRzDZJIYu~Y;|<_ewmq|p zSYPM#7{_FunoUznaXQU$I!`U4shL05vJ^SmIp%`;G_`e{c5-y(sU=i38hF;C1fF*h zQ#bFADDv}IX@ZMM6u%Y<#V@p=xgfl@u@S5Kh#k7X$wi^q}g>`&ylAe&uLKCuPnr@5M?)^ z6TdbWc5_AdgIa)T?84IHoKA3@;8@4i5}I0u(=e@Bn>hx!noW7=)m>z^jnfjxTorD$ z8>_H3@*X!wKgR$cYM(Z0aZZnMOmfUQ2^fX6vR!i^liNhjooW>iB`XX>~oZvy@ z+8lT|9pIP?X2I#-jjM#ysa*F3&6~4T!wD^Q8>bx{-5mW~&8S%4dTB=O#8tTWFhZJR zE?E44<}D|KQPTrus%ImqHFHtS(OfTc@Jcf~?>B2^5*M9$X3jh_7ww<6JS3XOLgj1nb8a@ExQ&%~hxKO!6x-D)sN*MXD_(Z9HrobK{g> zNu8$d;A)L|>iHG8mD>MG>S;z0^?2v>Q^Q9;E@aoo_#A+hhLkRC1qjezd~~wwb;jbQ z^B{5Q#1#PBAVu7WapaA40RKc>`Xv5>bU*U;RRAsEBHf5jqAack*g{bM7Jz<~Eo%V2 zL^R(D@C$GWam1%luDcEG(0@PzScCQ{#JTJWR}Wpxq0m|nuwI`MM%Dv_5ncF&7cZV< z0`UBqRVTyQy!b(g44_vh$k4ms2pWys7=WY1yhw*r3p&hObMCKn)M701sq}Un| zEUl{CCE$6Z5_q6s1u0fKP7P;CS5mLc1aC+3U!V}aN-t2~Kc&=4SC z3|Ip0U}w-1^acIFbWf(o6>1H+L!Qt?I2BHZr^C#7Yaer*ibzpMv@tp|kQhh~*oW$d zq#?(Ub0`vv$3|j_*jNlwnqM(s3)s5?bpa{R9Sj8HL36LASM0U*+IxK=f2cbY3Bf7j zDf21IDe+WipQq2)=kM$8%S5Il#{Sl*JK7obM19fJKzblEFg*Z+kO8o>e4~-*NOUZk zj9Lc8LHnRI=os`5bq_^`MurkY#$ofYIBXxT!x9Np*wDZe@CCX9k-&5iddxjykGr?C U*Vo(Kn+|0{5H^SL&No5+7m$5by#N3J diff --git a/bin/mimalloc-redirect-arm64.lib b/bin/mimalloc-redirect-arm64.lib old mode 100644 new mode 100755 index 11d71ef9ea2aace6dce34616131ab0e5654f45d4..dca80b9b855c57735600d925e005a34cc88b21b9 GIT binary patch delta 97 zcmZ1=zCe6~84JsV@&~6TTd^Eq{5jc{U1PE`s}qow$1c6OkyW0NGssTz&PfIa2ByjO nEYg#YuvLJS8Gw~kuw2+4^=a}ec8Fe;$>%vZpn5-ZtYrcKos1)) delta 97 zcmZ1=zCe6~84C;No=-<6Td^Eq{4v>v8 nvq(=q!d3xRW&l=F!6Ki$_{ro~>=3;wlh1Q-K=pp+Sjz+exUD1A diff --git a/bin/mimalloc-redirect-arm64ec.dll b/bin/mimalloc-redirect-arm64ec.dll old mode 100644 new mode 100755 index f5ee4e4765225f8725fb77330abdecc19bcbe172..a228af39d5a094c6c4c7d257fb6184c7d66b72ca GIT binary patch delta 10127 zcmb7}eOy%4y2sbrjLHz`n7j;vGAIhD>5v*wz6_~}SvtNY-PAK4U&4H8OjAsap{$GP zJ!ht+!70lKIe~n`F!BJ zpJzSmS{Bm&tJ2mnENEXe#|onS@~=< z+4$GLJ&Y#Sk2(_Evn8Ux+0H7F9ju8yL{~GdG?(VF2c=9JU}ehRdTA@&!P2At;xI-Z z)*P8GE#J!edxx=(!c=K_3H$D$$!z?r0?m~z@Gj5IUt5m+$r212nXYotb-3dhk)U!Tbt6#^UC+NZJkT$7@1{fXtT&X5W++3!~0 z9^aPZ$fBeRzK;;{sI(fUrm{JDg5zZJ*MTl48WHe_R(mKtp5 zLei|pNflc8sW~nLT?dAhlcE<1>B&IgR!!WS+Th2Ofz+1G0sh4M>m$}px|@s z98Mjf71VK#O@3@b)LU^5W_o0TwEO|~>SN33ewMK5F8T$Vw`q{{3`L#>Gux&K)X$D? zYLT*Mv(=l2(yeUB;|5l zm`k>Wp$`v22uTQf1#xsmk$vY8{MWdqh?B=U3*NJ$qO?4$tmJJN;oA0Ns`nK1%p_eQ zEd{kS$gV3p;hNEt2_<8|xbWS5E1ET2C3{jtiu_ininUIAT_*nE#(?{Px zr`%{M?5mQb0m!>5nT`>(GNV6($xpTsrUTjK zZFf6x+ZC7<)Y1Q}atJ%1B_dFikmUKPsgweg5qSL<@{j3KJ4x`2=?+r!KDXh$X zkPc(%j$Ax`9&}VtJ-gz_h;199_NKOTuldrSe}z&n3$Wb&LDDuE68p06usft;j?+j!sx?T?Mi0%XyZE+DM-CO z>7S5F>{j+HtYuaw8(V!=gLVUpa@`qQX1H$VVY6J*CG7HlbWO%n@rz+p!vHrD5F^g_^>~=53`C)1OR9ePfuFs{j@Hd4;>Qmt= zu3@H>ozA9sZfE5UsWh3@G>nsShOm>KkvQI5YgjC;NoTbUW2H_5d$w^TPGwDvGo_6N z_6JWAi}MUXF%I#h(E>KvGgrDY6zZ|?|GFnrDobM@wv1%4O?s(*Ae-`EgPEx*6*F7f zl*^XXC$Rda(UN1RV)H9(j_Fup^H5B5VskG26IIbk-fztz6-r#w&a_O$-W}(iDxK}m=6I(_R|c~`c<+(wImR;0A-zQVv!R}$ zG@DI6G*d#(zVi)bu0sZ@W!DZRu`do8q_eT?Ux)6%YE3+xD}9r|HXL3i9lC{mc=!$} zIhKt*GME{B8Pe`sV3o+0_zal&PG7EcI)=UKy90^4?3*fGiGkr1x}2>)k}GA$@EJ)D za@}ASI+7tJ$FSy_actsIZnpl-R}Fb>C9+dGP-kmS6Ux( z$cc@7EjC#G44=gdx>M8j4T>HGZ;$3S*zB2ZSrIFGEm>2R=y-}19E)c;uf=Q1avfVC zWN8YL9P1SA=f`gC>%Euub1YZHrepD%&VG&u6rl_Pddy~zzn06FqWQHu=xNsUS|*D< zu8VQRsfYC4aqPq6x3i+-L-^jf_4pJ$&371Z3b5e!+V(bg_wcAs_-VeAojE=U+w7c^ zN!)VyiIF#i6({~f(>cl^``S(zC~rOg`Xe`5A3ixbMH{0ImrM!ax!3073C1U!ew*)& zlTStQdv4mBV`B>P9Pf8`N7j~&XSckSsm*nK+udznThNEy1*xAZ(E&HNp$NCXnW@be zmRYP3l9qe1uk9Sfeo^|VQUV)&YGe#@qKIvGvX|ZwrrxzQ`evyujlGYyJnq!Hw#?Ae zQ4!OJ!|TWxWL*)LkMkWrRX`J#{<*uNd(*i0ol(^BbyS~*(9;BKH}IY911@4l_hQ&v zF!8nNgzY(%sZCTRN)%sMP^Gbei+rMrR7QHnRYa#^E8#iv);r@6n>veR?m(NaS4GN3 zoyxpE#e#8gkg2^xZF5fcrL-@jQdutBetIlN3?8%eVi)|Hu^T6IY6sEl}3GSKuzH%DEO=3gmhJ3 zD43Ye(!W{kGk5%PfRyzvyEVKlz3_cv<3>F+g;d$nL*@uC(7KWRJRK#)zRTgQGzu^7 zzK1WqOr=ThvA&l^(sp*+rIfxU?-NoU&*oejinopRmr|(FxBJpKnw|RA zxO~G(w!UkK{xqGl+5Fg}n^&#a{J@&WHb1_6-TkZ9Kl<1*m9oc;gl@)>rpr%RH?XDidW5tceTrt8vISo_l=f3QCdX(*}4PM}jYfi_~}^?_)fNZS+g0z{LZP;(YLl=o5o)`l)@0orr(3kOt6gj& zrjSsRineT;C;aH0>Zo}YXGbEKuuz+Y+N!Ai!DO08wR?rK(hmoZQmsqKURBNr2)SLb zL$IgTwqC|JPu4ElWWiQJ!)xlTROIiR$|oBXdVB>^s`9E}^y{i!p+6@Fc0+V7 zwcQ|S6qJv{&VPA6K0Rg^3SE(4&m?T8R9`)^)&Ke3bS}-d2|If)d-CRQ?cM%)(_x>} zD(r&2?1R1RBRzioop;ljG?vt;vFku(OShd#py^pcYZ5FH?1@tEzcPtus6^;Tn0MP zmL+Hs?6thoboxSHoVBJ{cjH6){;*{`~hMO73D zeTkqfcvaY$#eA{Ek4yegF}23!vham%826u6JI5NOxa37a5kO3~2;=;uh_#)=v3h)RR z08fA+@DxZ!A_JfS{1`NWUw{_y0%!v-fin0b7yy3-L!c%V{WFmP&;Z7RRuJDTNofEV z8Mx5l)p!hO0VjY#a1v<3QFc1$1@8k5qmTj64lV)7Xk-Aig6qH#xEVB$K?cA8xC=Di zh75o*xDSkg2U2M%F^xqAps<4;Fa#a}&Et>(FaVwcjYebul);ZdT@Eq;+Q1871iS=V zZbt^dAowe28jlPN!34lq(13TLc+do<4k<+kTx39D1IK_eH~|cRlfV!-9V8Qx0nh+0 z1Wn))&;qUmZQwdk1~-Eda0h6@i`FjC4(hh5R z&<36YBj5+1=o&k+vWk~$iZ0Sq8Fqa7KrdI`x#KRo%! zQ4EMD6d0m-^59>mXg`V!LN0ohklR2UUJzJ_1_h%_{X8;@gkuIzjU`Bg~ebtI^rD#YlgKG#CZ{pEfnWPg?M`-_d~`JSMi5;A7#dcf;fkg zd`vJXs6VL$TuSqB!82e3d4vIwujN%i;~OFna0VYh$RwZ+U;?>)3CIKX3Wh=M5AWkj zAozZu*x_wZnIOJBKwlc7cvDi=3~xYWIo1qsTBHUYO>e6XtzZH&QV#Cs0ldraf&ekL z^Hc|j68z3^c6XYMOtcjGggO#BE1406&xG2U&7;FV~Nt8sN z!KW{<2(*A+Fb57og1Qe?*#zDOy;V>a`XG1;{m7rOXMjfVW6%PA0ouU}U;r$=go_9+ zegq9cY#N{$)abD(fw7<$j0ZztDyTb)T@y5cV?Zl70Stf>5ztkT2b^_I4X_AYf_B44 z*d;+TxEZv4g!_L7F1%3e0z=?lQ1>ymNzeoy0Ii@4ltB*|1do8^6YP?p8N9&TK|XL0 z0s7p*yq6_a3NR@E&*G?m0%cL2kJX8 z1F#U>0hWNfz#zz%gnW)bAsfLXpanbuwt^Qx-G3t>9Men*AwurPuMvU;T#etXG`wI> z^V8?)XiaH6!KX^6)mh@SIm?}PCoxjObwrMqb+TSI$XPNmA#$jK^&R!PhQda3V|io1 z)9xWndCjKgl4fs9YfGdB24+MIWI2t_Vy9f?t_oG@T?SW?%U)Ai6Re4@)72H$+1%ys zfct7aX~=7^mNu3&dK<%@h$pMb(p=muH+QszT5uPT>D-^Q$k|>Mtm>!=SD9ReF0;$x zYOM*>wAXah80+%tOm*fux7+J(b+@|>4OtD9jdG)|N#9i7Y;Pu3tOtCk6EVo@1S_49 zs;gC`I=Wg{UF@>DN=sZeSGg-x6RwHWT&*FsMRk@sYn`pGye{bOaEIMj-K4&-!Q5bJ zur`!51RC2L^O{Ueg-u0G-saZkKy$E}zsoG)vzCo=o?Ix)PPfzRY;^{l`f5Y9-Bsz5 zU0zpot*%yIn^ns_SJrW_x_WplZ76RDH%1zX$I?{XWNj*GvNd%yhnmC9k>;z-@M=e~ z0A{&Jw#db@Rc?0%ogt^Gy0F?@T~uwUZgmA*?XC`&u{N*PR9jeUu65UW>sspqb?tSA z`mFlA`pO2m!QJ3(Xl>AW^d5sJ%VYEqSzXfkR!@ni+*9e1J>jNE6KU2p>zget#VsW* zyCr?KD>vRTWp2RJB(Jt3%b1YEw;NO;Js8O>1qSHdq^~#W(zC G>Hh+r993HY delta 10026 zcmbuEdsNd`w#U!;5uAW%qo9BwBn372jEWLlt)V`OS~b2}I#nB{RAXt;f0t4{30p=)d>RA*f}RVS`x@4&5=`#t#`)UxKUdsz!U zzrD}i`|Q2X`JMcJ!Fqgv_4okWOxkUqHOKVbCs#gvukzUSzTX?8T5gmU z+&B#080ER~A7MMsbvu}sb~3EbOFMSsP*`4ko%`jbxx@a;H$I=wW7;(d_K-57;PEE= zFYnMYW~Z@HB@!W~p_C-sHA$?U>gf(frKL27EtAGjAKQt)ZswF;p(QLa;;+XP$LC6I zJJ`vb$E3Db*v)5Wv8fBP70Y%I!W>J}+2I97Y2S8sbHQF3#WpScFFJ&+TC{;a<2keF zAq9PgU0C|Q)Vi5%HXfykY|yf1Y2zk#cG*p7XCZ4^ep1>}z}Bx=FVz-!F0OcrQp!fH z`m@yfqUYCDHzTEU>o8w9?&=1qcsfg3vmkoo215K1&t2Y_$2PB-pO^^o?ye7O??w=N z7a?v+Gwg&|mlv}OYf`0e^VrQb!=Gi;;-1FSwKh*d+dK=O zKSt?Qc6Z%zn$3>AFh*bu^ctJ7K3!@X&7NC7B`$liT}w$1 zCbjWN)tGdVr)vEMilPVQ&y=#WnB`<5TbDnLr+z5^1Z`r=HlCtSut6`Ll69%6T2 zestL7yS=5Tu#Qil!Nftt*~AuaT}oTnk*&?rmW6D|wiNmz+q5k!BXJ>Wm=ksB?k*DE zF2Q)r5sxaa5_>GKf@g&}y{v6pTskhgVqt*6;Pb@ZE|G&*k^T?3rpT0MyA)y71JUHW zy9~96Y-ffSGiTH;;5*VMPr(aM;=gjUuBymEp5xL>H2sZ}d5OO8@jq=B)QnDp4lC11{2zGX2 zC)CjT&pYf7J0>RHx`QQ|Nmg{FVJZ0V7J2w@tais#+QGitkwM$p(z0d4CgXvwK_DV3k^UOw;Tjj#2Q+adkkOw2wnIN!y551KOc+7*eo(M zJPv($Jju{X_890_%(g9#=?*Aa=v4(P+_{=Q#?I}`jLD9*UyC62NUYdJ7WL{lbavXS zkJ31{AGg?n$F2 zmc3`{@Y?L(7R^`k^SkW(-HEJX&qk?rAsf7RxD@vgTf28So4GeCk;dEIeXE~_Ce6Vn z$MOz%$g_5D3Z)C!9?N)zW}>~9HCj@oTeI0kONz8FhuyS{7&aHpzlI5}q6>z5XG_h33s z&WGQLr-Nv!tdFpMIU&U6zdN6HvbWzIL;u1~ygO6frnd9O9KMOon2gOhhs?lk3s9;F z>;KM%lpc=B*4)cAz*vy{WrrPvju%pdv23{V$+WljNHDkHI+scop^l;H_tZ;Rt7E1jZ-Bj>^*D4f#rH>UuxZW$X~cC_?wmq<*f&o2jlSo1 zhtcv=^dIa@d5*Fz(H_M8{1RJM5vR)@5$@yHw|h(FzK*az|BE;p2a=vq$Th4lh8>*N zhMg|SP9OC-wT!SYf50iO&xt05ofdPaA$bt61jqWEve^d}>1V~%b zy_7St?dwHsJujhcT({=>JJ z86#9|f739wv2GmwmR+bzV(z+R>cmx+RQxcz*b>i1)=$C*=i>UMc$PSVYdi+kkD))Z zpX;BN>>75oexekmVS5`!OP%Vl!C(9&?5u4BGdCq;zr5Wvo3^qGO|zvTL)hTv6x@Up znzN*~M7DYLaJIWyElpOjqs~`lsWC#6Rfay5ZiZbDxN9M z9Lu5^_#4k+JZh}n1W%SUWH_GHFux~9ii%;MdZuHpyPny|Zr1TERJ{FomZTZTnvPGG zvg6p^V<}8=LM<&A$bR*VV1^UvQfo9jdkg`fEshQs*22!5n9akSFtH21@Y?W2Mx#%j zlBWEPz3E*+=dtg-@zL|dHVCij9@M(WXQZ>(+LQ6ov%*6*zNfIT)XrwLt{!!1Zcl0? zX>sB{KlKm!9$J9yq`jo*`4RUH?jxJ>5@E`;nXdtF$#1R*cd{yv1!18fWFH2XoWPe^$n+(;3wSYh86c3RBC zO|$O}3mP6S*1nYuJC!9}8pM{Inl8P^F`6};Qbp$tk{^+u8N@c9p2E6MjpR?D@uwe4 zD4uRt)7h6z_`SQU&DEP0@eO~LZeg2F&qVKTo{i_0ADte3Ux+yKdqvw=d%Gv^jGFS% z*Ur9pfAn`B&Pw_gw+xSxND09W=#H9!#|+*j=UC5kAO2eezi+Po=fvo?Y4$IAdqV}q zQ(65-W1^<9j+|5NVs>|dGnqa5@#tuD7Rk5^vQyckpN)x9$Wrk*&$FLo(_wG? zskgLu>!dH75!C)eL}Xp?HG-vV`E=JqSaK|2oP8k5F3=EGfO#TiDUYx(AZ5u?HtCxu zY+6isN=o5K*(0B6F==?nNko3A#8!Yim**bGYeuh-o}6u1%%0@=seLV#=rp$I^NAe0 zKA#wkje*fXLFXgk1Ty8O(})M9wsZvIjfG{XpmRJcK-FQtP=RwIPXNIVeG!iNg^XWh zj5{AzK70OTH1=@VJ#R1%O|#gX3*RFNPuj)rBANMe;L+YuW7Pydy`v<-Xu zh5mCk?&~ZmTIb2Tyq}g|mL4%{u(|N_rr>S0ENg{4Sa7CbP>e6vFwKuod)z(7!5;JF zNmMHNlnw1#t8sl!tUN*=jX_y9_mSDVRz#IyMf564T%H?UuTcIL?b-16BUCCp#|B&< zO|@*o^`rqEpJUq(V@s~5Py^d^J&77T`>s!-88|4&#Et9nw&kT~^3-`s?e}%lxz2@j zb6Mw&SiF7zd?Rg0nH4^N*t(UE5rahBo{XDsN$hmU7?(y&s!oG{)U~TuudbJ`^&a)D zxB+a!k4jJGKj+c`hr`9A;1SRM-}@!^HJkwc>`*M~Utb?}V;kNaozWHe2eqNa>j&wt?$>d8~R6JqF}>3z9$ z&|tbca!Ll$EPZz{{rv#BeDBS{^an*u@F)%Mm2MEt&7|$#mQgf|Qf29-SUQ_py|)q} zmgWqj21WZB!i}EB4e(v5e>gQzvus_ePoM^`LyPZQqO9-B5pAW*hQX6~X*>|+5Ncmo zt;qTKeyFH+F&30veDa)8A)!{4YElp)AR=iU@&pE1og=Jsh1x8ub#i7=B|_~As}&^= zln~_?V?o(PDI#?XbwcT_(U>3I@^J1(+Fxb!5>|HjQK20?tq3;${mdp9|#Z2%HnFYIawb1JX4gKuY z_Y1i@bfp+H37X5jZ!V+jX{mb(P4K#>Q4@`Fi7}toIgJ+^5OR>q|CEO4Dt?>DgT>{D zv0On@KU+~;x>@M0f@Ol*R{72{fnqD()wso=U$8?^by_wydWWnCcTDI>rJSHj(Di|A z*CBUovR3Hzf_)t=`AD|wlZ|4$$ZOw8b7-HNxu2V|?YC|v-r{V!lxCC(J9j_(j(+w5 zZ_nefj|mF95XfuCp+AgjlvQ#uYC*kVUy^EX-WpzfuF&TTn)})HZ7ZwLmkGN2+4<#o z=>ee+3U&)RPKaiLeJd2Ds+N<~dNqr&Wi$pM=L(ty`;zYPwk_gGmkGU3u%n+{iMM45 z>=H?hSW&^gIA(c8?K)+tbs0_IXUJ3ZIhvRw>~j0rcX=nT$9SoEC5-j=ox4_1Rh&y0 zmx(>t@!%fxi@|_km!M7TLF}y+G?%JfB2mFUS$RtCn%;|3^=X={7vo03zJg4mt62L; zs>v6lRzZiL+bc)wmfNH83w?)RP_Qp*-(43HdS$JgrCRV`t*J&Y291JdL4!}8+$L|7 zM4QmN1^Xsdc{`s)uQdUo4+*O3uEyip`aaMQ4iW$&J)JXtGOK z`1&lx)B(2~xi6D|7`J)p9Qp*E929b>pI_1O-};eyIa7!5Qwdou*cad3@>@Th7%!9k zG7Lh_?dNAb_FF&f<|2M@ScRRfpMC$jT;5xIxV=xwzPkJ8Z{YTSmH4Ne*Z&%wD_ImHC^4uAi2+UEcrXZNa(M_LGr0`TKM zo&*EnO3)BX$ZF6Ht^?KhQl}8Kf!jbbjF8=+8GIcKg8M<;a6%4(*5SlL4&x#KMJ1?? zC!_(ifyY6TK*(v(41Np-!SkRok&v%JKX?VyjUeP2=m4*Ssw6^ggI4ek7y=bZC?FYy zf^IMlBqJ?^B;&$3l8|)J0geaNDd+%~!I@mfo5Nf#gHLi2SHkjoFF z16&4ob2$wi;4-+M%jxI42ZP{gkc>tLKs9(CG=N`&Ch!Vq z1+Re)@H*%RZ-YVb4oJqJ1Ih3QV?Z+)2U`5NNXCT<@7C#{2^un;tkM+ZPZxEs_>KnFkvxE~CG2a~CV7$%|vP}smqFbFn) z#!2V^=m$@OIvqLyI>7UwYBD+iTEQz|2)qWGrl1300K5$vrlJF&8&r%$0eD@C0S#c> zNDCZrkqm_uOa~p{c+d}Kf9@e1bz&f9zh4d0Qfa%coZD~-QYD) zJrf-O&ERd&58eS)v(SMQ_=7QE0JOy6!tfY60J_0+P(2$R0Bv9!W)FF zqCx8TcS2er=l_wApTVLMLXHrVq}xZxKFIChD#X{nMaV4f58}9?uD8$y1eCP<37Mw| zN>&>oRiN%2LUwWk@EMA1N(tExM=Kb~CoJO$n5f}BLNdT2P$LB++7A&j3UW929pZK$ z=6g3t^+yQV1Nyz#Q z1g=JmQLFSR5lnZiw2{;O$$auO6J9Z0M?UP%H2cK{p8<+t*Kgi>B zfs-*Y-d)0-!MjVCc#aI~EqIU&UnG1$NFuQkJ;+;V1~cG@_Yi z{7{}C2jpvS1-YG1Fa+{Mbbi^+401ac$O~*2db}B7$t+kCT;yTR@P4m<8Wo?E(4sSmxoBXulw&3@ifi z1qNvcyDYd!I8TTwo|1e}5Bfk8sJg%_@e_lfS4OIULX9Q>jEw^!D#l;=a4Js0F2!S!6DuBZRG=RyV8B7Nq;CL_qW`gQVd@cPX z7vu?-fILATxEka9zxsus8{7s4!QG(hE8PFDrHxl5gY$^dO%&A56vpRtBbn?cjK@8_Wc?mvO9vxtA?C*Ktt-#gm{9TnTo8 ztHFdn<6H-Gz(TMH+y=V9-5}|ZC)9zJkWF9%XakRfUEmea@GTO;$a4H@acu^7 zMBsqHSesC1tk12t)weeU8kCKOrko~gldsv|jJtq5i9kB1 z-sviLm;1`w%hi?IN?oO)vaHHc<*M>kscI5x)HS-A5|_<$uDw1`AFMYt z<}?}`a~n;Kz9xTDdsCpPg9o=^2LXI342~R!(ZLta?^IW4D|8k53PVL%rK8eS>8n&# zCseDewbi=nk{Vl0S&gH{RYPi(wF$LFb=JC)I$K>?U8ugho;0jgHmDkiLta;7LvBNU zLs5gZp}jHC7;FqRb~hTDbDDFT^PA1Qg>FhR0iVO~2sna{E{CpMU!GH*TkfiGSNJOe a73wN&mA)#cs;t^k?XLD$<9q*C>Hh*9LnR6T diff --git a/bin/mimalloc-redirect-arm64ec.lib b/bin/mimalloc-redirect-arm64ec.lib old mode 100644 new mode 100755 index b88e8fc11d671ae0aac56ea260ca608dd311cbcf..0ce7743647b158d25a8dd82854e6fc34a0b6b28c GIT binary patch delta 98 zcmaDO`9^ZXKNe1jwv9ja85tNjCLiQ5n{3P4&iHdOKZnNTx2!HemIH_MW?wdCM%HH5 qoKvSK@8yu6JcYdqti%8;UBU8%bN9{3dpHgOx%HenP(8;vRha?%|0F8_ delta 98 zcmaDO`9^ZXKNil1uk8=$Gcqu6Og_kAHrbZ7o$<$Heh!VvZ&_V{EC&wh&Ax2PjI7q~ rYyX~}yq80M@)Y(euo45XbOp;giMb~y@8LKEtRGM{AVNWe6bhD+fe7uGbzvJo zG}zV&9)Dxk(cQ22^d5{ty^IDytPZ-7b!X~jhIr+xu(RFuRvBlq&-uO+bo!gWHoxDK z=Q;0r&wJi;%Xjkm-c9liB?WGzv7^OHvT|UcF;$6>obq&oaTajF8HQlF81`K0o@&{=z^FA@35HenE&0Oahd| zDA-5IhemD?AB7T|fe;5fN{guDXkIppQ4?)v!!)0|*+fJBf`Q9$^#S{UO5jHyCn1pn zPh~}Q1<}JIJV}N_jO{Yy)6dzj@U4blHY}!(jk{yfd&7jtb!qSKpybRr6Oj)UzYR&M zEdx9?`Q8YT8`7SIQ^w2V=;27sxT46#@p6N)DO^9!WLWvsJ#h08vUdS&br*Q7RI6OK z#wy84@5ps;%5_=$;gWGY#h+@H>x>>0xSK2mD6%XOtzkGu$K?=1O1@vZFB@#X2G+phRDq^TX^*fLll7(@Xu`*`$8d2G z@>;7!OIrtP#_=^)lYg=a1CV*Q+5m3is=@eE-OV00raU)}R$>B)#e9Z7av5zLrrnj^7%%! zb3di^j7}M(TiKgaO4D<&Pvp9KtAsY{Nj7cjb>;;VU^-&VSi@5&9>0dIooXw2`3^p; ztd6a`ZZgd{UTU3-yt4;P=nMOSVm~~o`y4v0`@r1~i=N^i*(+1$)6MMbsYP@)yEeOQ z-k(+@_v)-N%)1k-PV`|LiZ8PYXV=Fc-FJ5GdpL(Q<^Cgm5;G= z*~_MsX=UT^OFWa;jf+v#W+jc;rya-)uhJ54{c9XcTxONj6*hKyT7}`hVwOL>KY8t4 z7>^34VuOvNnQAb!x_m2dxSu&dvCu$nX+m`@^xq zT5z{B<0yH7k3()KvwGAMpTy%uk?qLKIp9MOu5Oka_yK^D<+{hM0p1Cmo$BQ@LS8Uo zLFI;etA~;#qI%g2KbboNnv~T=_*i7@qppR7ptXa~HrY2-ON(X9u%`3mcn@3Tx(BU| ztYpTFV%}7n7P(=K)y4a9tDWd4F_!qC+{pJ)B6od22Tt>$pJF{T*3&0gvUTp{uST&B z2<_bd0@ei!;>7HiS?AJ6S*>+{lG-uzGY zpNB4a`8fC_t2N*1$=Zv~X}eQxWq+D^y}|M=yKhz|E12~Y`Vm_-D~GMeprHMv)X7%$2ZJ(bGk&B)^pYo;{SGLsiz6@2Hhu$Jo#p0ZW3doN+v!`3Foz{1E|o4_A9N{Sc^7jdOvSkeKnssD^Eb;D{hNtxnU3R_AcyWoZLAW zNE!CS5z39_q&gEu>&$qZ5WdgKaAuBB?U(h?Y55deZhP2}>SE_?y$0j$?7+1*(9!U3 zuHD1WiT(P$MQW@-+mRAtNo2oqI2l-an)2fjJ<1Q%&psYCDU(Ioi*`r`TSN#S!T{S`9f1O#6u8&|e? zglp_GJVUhMs*ijhk7F7zB>waK6Wv@*JuovKUSeN7)lMwi@2~j1?EpGyHm(?JcgQ zKVx4OUpIaUufP0u*m_ejrHk2?CG*B7-w4xzx7gE5vgwWN`6d5KOIf+|=H$6Iz`ma; z&Si!te#`}rnr$jZKAJT}OROSWWv-_3KI#51yEWPOVfW(Rw= zWJ!Lf8joijkKm~>BT>7Z*WPzB9zRCa#b4lTzo8a|r{4S$opA=vm?2ymumELsPCR5w zt!}x_jF>-JfL4cJVc#u1OmkRm=}N=73+!xZsiFP?o3!kX1 z5FLIJI|&gyM~rjBuP^(a&M%w~r!}u(7jQEl7iKYaP;L59oRW*}`Z=k|M3>pz`=J-v z=q*bPP4BW|IXn4X-pg*bN#1S%8VQs9yMAJ+O3fAFRVy+P?BbTU8ik zb*kR>-$mQ*D{ZmSvfk3&@~kfP&<~BHR~rA3AB%>p6LyxrLMmtzcBk46XKn3jrt=53 zE3x4iJk(9t4rh$DtY*dIhA-`G!b;ot_qhG>Gi>q7*@iRraMj9}Ql|Xg!hcgNV;}e@ zEv)#?b(YZvB+>zh60;#R|$lG2GCDzOH3AuFo|@C$s*wQw`5eX8*XpNIK65iPxm6 z184ba7~esW?;WSS$oLv`4XZ7l_t=JUl9B?ynR~SFYVG?M+P7Exejp`WxM3q*^*-+V z{PUII^!xSlY{5>!gQ8xq)xYuA;U2E={3Y}FGHI}hH6 z6~@E8z^i*F{>Kz-)611?;@z2ZyvW|dW9t1BL6QJ$Kk6H2gAhK-Mz-qi%slA<3_`vU*5Fa%;07(n?f5Cf!KLhOJW@BvYPAe%{mm1hd9WdBw1a2n4$@mb3bY{|Xl z|GS1>QdYU=mTlFgJ9o+zyDHhkTc@-1ty9^~tvT%Gt=F;Ets7a)aN9GLRzphBZ0XjD zJzp+yw! zf)l}=TmH5)m#7bD&C=YW=;5qy7r!tmf(HaAB6`CA6I@3p4GFU$bihZezh z!HMPpt$ChHnB9WCf|X5rbEoE(5)kHKxT0e>&5MZQkl+!)iQpkUxaEQ##38s`aKbIB zxiLqth5rQfZl+Td;Sn0+X%C+~wvHaPo0u(xC;gR@a?Fg!qW*Cwta&C?j9suzaO7^? z?g}4!7Te4z%r3!If_=ixth@PyIUraOJR;nDn%k5CVU7w;q!kV49p6mz)P#9h&p{GQ zHtXdPZ7wD`g}Gd?dstsik0|>ED+&Iab(%RK%u&H3f}NrlGvB?~Uw6{o)#91vAQg2_caIC1Oj(p(QjUNIcCr-q%Uq7zc8M)fK$ zOF5UD5)h?E{TL~t7!sUF&Q&Sm3PVKH2lZxAQH+gg7Tfx_%|=AMqBkSo>D3m&iQXb( zdb5lAkk;%frzpClIf2m|$a5Z`9p`!fjxT``DBRHTO9~!c86H zJ~mZy8AA%|6Fx31fA~BNKN5-2T3qIC1x$NzkphpOh-(b4VrK9uU=%tE&Ygto8MFjm z28=+vz!w2&56a+y$#~@q?aRUy35ddH1ibZXyex+HfU~cGACCim8mJvYL4gW^+U_l28coZPeWGWKM!JP}*1%4VZb02Uv?p|JKejnQaxS;vH z%nLZ6aX+giZ=>KufnU^eB;3KFCGh%uWXv0Yp8?9DBjBs>dc+NF27d}vaev&KR`a_P zu9a@^qd*My0Jvy2I)mK_t^%CU9`J{NeQzxsA-!;Xh5LXH;1$3K*WgsVAsB@= zgVUYZLtKMRB^V^)*P0sJf&}Q(jfpTaCJY_ZZ6SM?Ae+Obi$G~sgi3FgdcVhjM)?uAc zFoO-{$ewHPT)>3RoZx!^GqeZ%1Ym(yz-IvoItu<4utAgcgyaEsXghc{;DC064+2i; z0Qea|u0xC8?pYk;7dWl1+D^u&>rw(KnPj^p9Kb>qu_6V2sF8yklDZ> zv>m(=h(f!;`+*^7Jh#a)AO;-)UjkHUJg7MzL7o>!T=D+tyC!<#itB4#hTQn(I!_*d5gQHs>RbX*cxpeYK^t3tqK>aq1S_A0%>-hp0ozolR5xA&6> z_7FPqD*>fZ2`Z%7)NF3HG)v9i7GKLq>u4)!Gq+W>d)mG2{&pUEw1bEBbn=L5m)zs( zarZ=e2YaKvL%p$HN58XQ?sxT<^Vx`E6+xj|Wk882gGyAfHQSq=%}PtKCDby|5^1rt zNo}?^M_Zu1v0Z5owujnHL37X&l!CUPuhZYz*g4uox=r2YZcDeP$J^uU@%IFJNDRw~ z!1iuOx7=Oc?d}ftgnA-9(Vn3msn6Eu=#%?geU1G}f2cpwKiF?0BiNv5U~aNB*_s?p f{^mfl(j00gt)^B>tF6`B=4%VIDQ&nY4AK7sTn!oa delta 7813 zcmb_hdwf$>w%$85G=;VVOKnPlrV?m?0%;30D325h9y>LK z4lDTm39o{Quc-KgqA%LI!Kw5O8Tzf6)pn@+i8x;|As-Mz+-xUJN)2g2dw)Jk%o;)@ z7a_yg7n-aX5b01xTHFNprV=;2a@hAYleV!`P38da4H!I-J#4;aM4}SNLkZ~*(`W1v zO(wm}UcmM1z&Xv`^pHjOY|P@RuUA;f2Py6XU`Gn__aFI>DCH`Nby~>wE zuE=KXano5SDK>r%H)3xP`5wg>Fb*+l=&3;do%d3DHgGWIQA*!pG_{k?W^bkzB>sH^ zPv$9;5q;!*Hf-ov{pNu%9n^len5QuPo5gJDP;>qZT&qm}Eq6aSgbq4UV9G(>X&pL@ zh5bOWANHs|8_udeaCgF@rnrXvY3O7+lU*B{OXJy?w8Ba6EJE(?tS9NW;@U{k3#{O=WP-iYgTKAWAMp_iAdzAnZ0>T>2zA4T71N7C)|NT55tfzpTB z;f!ZWtNOuj-zifAkhtRqy|Ulr2gQ98n;X2c1liuYt*{o{?OY7ft~hM+@vfDHCYStg zd4!zgGm|QxGIQ)aTyO~$R#!9tTxD*M*SCxZI8lnHV7vy*Pluh3+d-w*c| z$J~KhDyff^DhrS&hOzLBZ9HSMoQf@62xcurt2y^V?iEIHRzBh?-US}jx#Ssyl@L#*H44#skQYeHo1&2o5?>rbI<56_QCVU>ir9k|zEbjq=(Xm8VpNG)zule;7Sd zbLAQfjvkp>c9v)I+FN)`C`)cp@3~KN7|kr8zhiSVN6-egDbreNJ&(B92n1*Za5Qn` zb2ss#7}cT-$iYOg2W;4{xI-~8YAhY3+$Irs5^SbX(I_E&=M-Y69i%Xs@yxQ`Dwgu> z+I*{G``9Y8Tl4J$?1H&f^XdJpY|K406gWL*7p1MtGH&`?Vjz!9_(d0E&_R+W=7Rq{3_-o=?t4bZ*35uQ{|n48?Q`S)1uxP=3nnf}zoKv8Qm)Wy(7Wcn*GTNVI z-E*WN&zmVJj`hCTi&rMoI3^BUL8PR1BT+H~*ssHg@lY zb>7XsGjH{kZuWii6Q%D&(YN_#Uu?AG@v2(}o>Bd7b++8>{IuFRC7adHH%`zByGC&m21yJ>?cKX3(hVzDBY(37hmcF9d+RnaOD$x^c!m>2_2R47%uTt|;6o0<` z|M3l;k;2|yw#@MEoxd3ERW_|?dFtk0ZnWRY+KMh|{@8-C#;{Y%(>3N~HgVZdO+qre zzC71B^DrUN;&{2_ZN3`XY^Z$i*rci2QQ*ZaI(-uJtcc0#iG$;)8_M-3<@$H!O7QH9 zj?yUC80Ff3K;ZbtA-Ya}gg*ue`AJ~nqpEHeI7{GCf!hQ=DNyxRuQlksIurIebl8S*+b(V+ zWFtGiDrG8<&Bsu!ragQ1Y+TO=@7Rgu$9r}{iR#_^IWb|As=FCooix52vo-LT>NvTe z#0T3yjEx%ykzhN=@>eHikUj8LLlAO<5U&UN`rW&^7r}ub6ewOjn8qz2bg+gDri0lt zYtrd%*1l#0UC-WGGcG{aGR;E88%AbR9; z7i1F86|$0jy6(3LJoDaPE!(+1z4ZTURS(20a&McreXBcv`|hoa-MhAK-$F`A;l^F} zZgLlF+a|5szH!?oa)I9Ow(r{E-n7BJbn7m6@w$QA7HoQW$F9c+c`$~#oCcP)VJu7B zFoONp`k`^#-5a-Un*hNl+~Suunly1aS;o1W)*G{P1LHTJr?*u0^OjZGoH3Xh`;Bu2 z|8T=E@R@UwHqQ1rFFGMb?D&p6Y7Fe$o=2&ZWpAIU^)5qGVB!8;${Kdqw82}dai>ii zxusU^vQdxHGcGK`MU-%M)kwnX7zTEzIFI^Oqgfbjicud}=+2{Yc42fW#xOjsR3~gc z#b#3-JA^T$7+tDS7DgRg_b++Wry7l4sDs-SV^}p731f+3)CUH8Ft}G3{fg198s8Ge zu0a0&9W?H;P$P<(LB3Rj>jWAEDh0;H9yn9JjAmpBvsIw2kDFd~vkP;PKxZE}kLu~dkQ z64;x1#G#JTtNTTJQDEKk_tNa3P(yvX+1K6PO&0AAwOfx+^Sq&JVnv zKQ;5^$6u6KxY@v;8>*JtApAP|_~RKQ{6lR9il>LrB)@*JPF7Q}tyVD<*x8D$WfW$M zKuI7GZeG>RA>V)r`Vy z6=)OK8%0-AA%Q_LuvZy)ctn`>SJk|F2P9%2tfUzh(PS6s z5@_Qt@b{}zs1oK5fnk9j;TBTd0>O_T3$;gJZz{Hpx5w0953G3pemcf@UA0>T z_WJitT?!ok=m9#WNVqxsxc5!DL~$QbF5IfFv%BBLSGXFaLs&ce1nHY&SaDbD?7a9Y zEyYoB0mQHi2QOgkU>uQf(9;87gXmxn0n_gwqz85*a1MmPZU=TlEZ`Af#}GoS;GSec zu0q1_j{rB^i5TE6U|I?xojeZkc}S@Y#RqZ)!cUkH;58f|yTC~*AS&!z>QP!F#_>(A7B~&`apOb;L|g)RNM|Mx50U=apaR8^{ z?lP;znTL@j?WLJqR@4k9CFJ0!(}WL3n52K|87f?ggeTMDpN9;O`-J za365(Vw4@+0i5?B_7=DuxNj*c4(?rw_5bxUl!z<9mmqqc1n?V(0UYOQl2U}=;6`8} z#0+i+J_@mbdw|stE4Uwc0b&CW12xOB>A>~C9Ecs<2HXNEDuv<#c@E+L_W|F7IKjif z_!Vde*8}q*{GUN~U>U>%juRJYfRuy#ffpcNa2(i3{7NhhxE?qQ;^Y3n^NX zfKNesz`ejT5K@fw4}tsy(G|n61|P%_JviQ)WER8#ZU^p#7{NWj;}A1AUYF!Yhy@() zM3N1$g5%{!9)Z}nKQIW9xIgeqh#frQz^%9r>r(^;M@w=N;^0oeuJu>~aJ(|eiVY|` zxC8hm!~>3Z1G#4-$_|c4Jo(Kg%nIBG)NICVi}49)3nYI1BP}%{H+_?#$=+1n>}~FB zCat&zc(%#cfyhsi{7N4`)Roze>J}MtI))dt`YO88P$GVQ`>!f;neR+Mb zp|gQBTAOT5B~AY3j^;?Sxz*C@Z1uG@w1wLcz^BGB_>4Ypbyc;lMqgv7F_+eOYRhZA zwZ7V(W28=3XQ*@4m(+Xez4dZKq@kxl*XU?+Hgz?Jo9(Sdt(|S5HsZ(R5zpq6e0HD1 z*HIm;?yT;rw$xZ_Y&BAiy{4hoU)xa|tnI8d)|u-pb=EptT~)oW-d`WA@2uB18XC=w z)<#=nd6T!P)Ys&1>S!V@x|WhwPiuK=RcoZJr;W7h+ewfRY#ZYAxqKzQa$mSwu8vgq zRFk7cHIAB4ZC7o$Hc~6q+3SkxoOL{OS3M7FZ{!g>o6Iei7F&z2wV~DD+R++pHMASs z&Fz+UYdeC5u(AMesjtfC^ELSVKHX9MQR7ihO?i#CrmDtQ(^E^1>5dtWx#~*lJay%D z-a5HHQr}Zg8gvbgMrUJ5V^>qSNp6ZX^)%UAidq~k&K6e-31g`cSl?`DHaAd8-P`VKZ)i7@2sSG^M0`CyUA3XwdDL~(bJTk@TqD=? U)aYu9jyaCGj(LvZ+oOyAAIj4H1^@s6 diff --git a/bin/mimalloc-redirect.lib b/bin/mimalloc-redirect.lib old mode 100644 new mode 100755 index 1d710c011bf7273a3d693106fffe03f4c5b5a1ab..785fa4751353bc3375ee9b77b6942505053f63c1 GIT binary patch delta 88 zcmdlbwo7b-1Pe<=?vksMWmtAG{+!Int}*#Piw)z?%{Huhj4U+@Hg6~Au}uQY7ywBd hR_V#U?6EAimrlHzypA2BMP+g?2M0uQ^G1$NCIFTDAIty% delta 88 zcmdlbwo7b-1PhC+gZJObGAz3oe@td%*O>gD#fI_6W*b&LMwb1@9(|si$2JKpV*n&= hSfwZXvd6L*mPy>4ypA2BMP+g?2M0uQ^G1$NCIFkzAE5vM diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll old mode 100644 new mode 100755 index 32799ffeb37474d9b899f1c34482a6034580014a..92578f240e430f5baffc000ae3c366db690fb75a GIT binary patch delta 5587 zcmaJ^4^)%qxqrX#FK8%11A;~kiWL>~%m4ogZHeNRXu^@|o!Ew!dZRG%zu>>SMPh$L-w95E?nreA!KJVkmvU2C;#w+jQ%u)q_~|?;$w^yPcTB} zBYcZl%ame9uvQ=$#p?yznEv2%f>!3m^vd$_Atb)qHUWQ;&rAUFE`Eza2z~bk%6<1J z5gZRSr7J^o85bvl-%D>`G*qh%p~I{FHes4A;m;wQ6XK4OMEfiOz7X>&`Odw~3$M0M z;KiBWX2$X3nS1kzWP>f*X^Z(CF{7jKa7nv)9DkB&(eje`iH_@_(&4!0=XGo1ep@un z27Kb7;{=iVYocj2@e{Ut9Y-hd_N)Rq*^3~oLxQ<=G>8)pVF%s7Tk&0HI;lA-gq(w? zvjS<%7~Zn*5HlXUwD4DgEMfyB*#E%4EevZI@$uhyRX+0!&dXiT%wSWlVF}^xd2by4 z)%YWxNkID%*S-dK=Pu2rh~XYvg9Eu+cqHH+So5a1RKSb{*DYGhFwvlEv6^8D@b@Lh zm;e^#7ccNU2gx{uF77ne<}ZKzoDd=372&HhU>aP*U*whK4%zM=4|zpYZ*LCKNWYX$ zPU$ilZ6kh(mE0l6-;RIaT|_0n{0xmAC6d7FNX!SyaJyIHV179h#-HYIg1>nMs%nmU zSnRinoqmVN<*yM|C-Vuj$JZiCYbuzJA;)hMIg|M`tSjkzE{hPUfG;BSnRye7_yyy| zQlSceSddf5^D}1u0|Fh*Ac!;;cK}BV>cj+kDprw$OjL8`<+yH1y#STt(Iro2&UlfS z+k@G{WlSEn7p{>+fiaggYUARt@Y^`2N7#gq7Y;BHEH7HVQrg6er%Pc$?D=iekj*4y z6RY;<@PQ&N3&Hfc4obP|g_-eWs zc@Tu%-5tJ{Ou9J17s(_XkMcKrJ8NPW+dzA7r|k|e?R3OR`MPZ}CwF%NJw}gEK=w{0 z$lj~7bnmDw0ozb(Hw5ZQC$^6hxifdz5xX%9F~_T+r+!_EP^T^GYl)&Gi%5WaTjFqb zdrN}sI7BX_Q~AN(PDgmJzeWtfdz0(%yc5trlns7CD zoyJx`(}pv@TFE5QMRGL$+k4)_&q*I*s`1;>b&IC%l6cc$>wd=Xh`6|&_>Q!Uc^I^GPV7kBfyFHmqDnr<@swkUu z;eLjDWewiGQdd+1x?t4Tl0bBr7w^x8<8pHE;J%e>TE(e+M{RME_NXmJK0b4V%!Z~i zkBr>M%W#Uc8>kg;fv1e#MKDqbH-n5UqMq91UEB@DC+A7dF_}+9Fi?bW+2Ekz3_A1Z z)q|-LTO&#)p^gqwCj+&>;p}|8MrJW)|25?=3R#PcHPJ&$K}>Fm$mQuJ5$*f~(GWrJ z?ItRuV)lt<7k3@MB3sQ2;L9>+W+hR49~TyvW;T-LOL%j!L6GtrOh1iw~@-W4SBEqyrRAJk2m{xa0G8 zgds1{_?i%2GpWPHqr?x%yG2G-xYv3U)&l zaOyF7rSJG_M4iy4WHu>i58e7KRf_}+08*_(Fbos-V6hj%g$osf0>q$G;Y!^hCr=*= zB|R}At?0i$xRR*~{`|pr(*)1m!cFTfSsf(&3sd;g`fm&VI2Fv=u#;Ij2Jc_Sks{)g zTplt?Jt3l}#?58lT@a?@6>1~He=4&v3$U_$)k2OiNu$4h3%kqhf}WfB_3{-9VuTGx z#cA=)ay?Uy#Ty?MocI&ov2mT?g(z;_xO43tewLD{RTF{Q&45$>1@fq#1*b#5BUwxE zb@eh_xanVYWwzoOcI`^kOkM*R zDOER#Az_Ct=@Aj0E$-xEH}QJ;?nQjYc|^@q2r{k!za%%67e#5obB99k?5=}*>++Nm zY5NUo2`-lQE2;s*sr*gF_ZZpSp9v<<$OZx9@Wr2DHxHFd2_c63-Q)`O9(qGmSROZHD#XFrL#CWpad{h8JiZ%u{%m zcD1zqc4{x@YvEASVwz?Z{E*;2{t&;U-Ta*wr~=GR{>o>}&XT^bhlt_LspiDjX*7^> ziN^~}PaBbtR(O=(=;RK>s0vt^d&bj6#165G`}i7e)JZaGpp%wkr!IfZIl58o`nV{4-Gk*BZJ6GaukD3>C|emTZQR zB!Pn_&PC9{LKt;Oi*3d|OVX*lfE0?qPC{2QKV6I8GCGzBQ?h^f%_Y?0<)&>*ME`7` zNQ;k}E(lJKVx75SnK%4U(Y9f=m5h}s^wmhr9Pog3cVzw8)DQykmp+O>o zHFE_7g4`#I3pV_JAwG}n^@p}R<*jUJsH!_ykF{Hu<9Dl;V(Hcu_%BuK@s6#7I3Upc z=v#tpWtq%TcSxqt1+(oz=I;={@7S?UxsC*b4tnbw8a4vKy4u?ShPQbied^HReNP|U z)^y+rSm71h9t}2cn@G<<{YWgq?KR7oK73>6%Na7jEX5TRn*iG>c2m3yu!rI{ioJl3 zQ5+_i>B8rCu4F<~b`E4CREF^RJuhdB0}fMML2(qY2-odi#>glxc@2q86uT*Q0`^ec zM)48Ay%hIRJP3G<;t<7QzzK@O6ia@L#47B1Oo~ssU(Rp>+68DhqaIcbWR=$pt2Ur6 zAX2#yXa=(bDbxdKATI=H3}`aN4O1=$C`LFx;df3@t^|-2s4T^e5^jbGkR8kGhcn!; zYNV@UusTIoeXt6URo)1!#(}1Q=A#+NivWrf&N-gTM`{AaCskQkc)no2SGtT;dkAn{Lm z)hK@G>4Cg5Kqeq3(8M+*4iRpK2T&XS>gi!gFRTJUV?e|Ka#GLdBoSZ;2avb~i15WJ zK~5DgcBGg+P>wfp>H&FEoC(V50CXzF2~bWypg@WfC7iqvpmCsSAQmF)!NpAjc`-og zgGgKmBnECH<+6ZGKz1M}a07&!;|AmfBH=}`>)AIXU4Z(5LO|l}a9pXN^TNQ00VRNl zI$tu;&p-u81@wQ#k=Oy%1GNFMV1%8|ybl;7K*WHP)bS*nE|~^K9H{Vh2o|&}Qre3u zfZ+x@2Gm0Yn}gdc_cFn7OJIT2{&Q$HAYv+->VihV-;go`GD{mnVm^>RqAwL~n1ToP4F_d6e*H7|Qz^zD0=AApK z#0}hrlw{uP5i02dZf{C5?=C*YCuT=}wxoVwymwK=In))mCGEK9l zTXVBjGpD(v{jFA^+oC(I8_<2D6YGVBVgqzPB9E4bZDOBigX~-EFWC3l57`?K7{gloyl}$_7=Vs#IO3o>6n^h&rb3 z)AVc3X#$#Bty^2KZO|6#q&k_dR5z)c(oO4TbUpf0db`1C5E@0sE5IxtYz8~w#--}mR{=_YoE2>dd`aa_+#0@?qX|M zH(Sq&gsR;LN$>~tGCmBdhpH+I zHhA}Sco9O4_yeYfc@P)yYIuq3Si#%D1j9YNZf1xTJkGCQKXL_$uKPmpje5-lAn&rP z3`{)yCMtU6F`_sXIl)&(=5xnF;bZ(ZMnkQ92tB;UZxiI$QvO21oEG&D5$mT2=#4NJ z$aDVXjPiO<2rtXKml?r(^7fY!%{{infGz2FB#n;phpT(cBl!J1i#DTKINkpOnDjfo z_GfHe3w~Q7#|Au!(0`g}{jNlgYvHu*>;A_=_`duyIhl(ft>e7;TAIXZhoGNM$av{} zl^LM!s1j-p|0F+@!wlnXOOG=n;g^>Fo|jL2fCl^Tcywt*!-!73Ikt zq;^{ii3R^9KG~&L(S(o0C0>fIIKDddnzw{%e)A!kc#LQQ&m&PLQHFbcB@UOaXCnB6 z(v9$4T&8N8q!E_+Z6c@NA#C)!1WoBuLI!%fggGb57IP@{+l0<^DNSpf|G=w4L{^|T zF6cF9T$E%6j2BA;D*Sp`VR?qnz4;_0I+05dX)JCKC(2qx1bXULlZ{L@anDI`%koxU zy9`e(e<*L#57}h#ht5CS@o>o zM8==rHVf6vKsCuGPdPqPp=DV7i;5e}UhH4d%v{6ViX+`Dyf{!J@(PMZZKTWdm64l# zNO7_Mq2lX&5h9B|=!!ORKV65A2Z6!hsp!|~w3GYnW;zXfrF?DwfFt=5^d8zj;QBnH z9k4A7I-)Q!*~C4N*;9{zN9Ina$lR;lnaLBj6wE_|j#Q(E1Rwk{(KkU#$u~JDVxbWl zY%@e2uqArC5~#DJXw)UryHXDq^mL`jjN|0(bSfkJ2W+wZeuoH>`OxnY4!Gw0w$z(3 zpt-9gGVp)Ncpq@Z2W@w8&Ot}4ky9+84M)6^g6lm%qOb2pMToPy_Zo@7n~m!1-7h-dz+){0L{3^Z(H!&s~fq{ZH9Aj>iKPR zMWaGzBlm1Z0V|gByBv_1#L4yDW*jRTb$}*D!yUd&F$=H9eE!Y*(&@A^l36GEDhmf; z4GuBfC)If4DqV#drhxC>t`wpdK-HlF*ik3fg?+2mHV7wdN$4Du?cWL80surdg|sql z`)$?a1rxvv1{?2pqy_}^b-M#rr{}|Tn)HKk0Gcj|U#t~lo77^g{UW;|31~;5(Umy9 z0#1G>6e4W&zB2rQN|XpToYUdOL>Ta16u&y>4|Iv^MA%~uTkZ@ zpf)FG!go}Ehp~tERXcfjZOtks8n)HElEW)_ALrg<$@dWV_3zyb0FoCnYj}z+~ zc+x*%%Z8OpW(W;y$xYz>8}!Vtad5+C-rb+zxec{E>n&Wg@&1~BnWr5%V1r(ZLv3>q zG;=L@#3v!>)>)z#<2{;6JiPH`xIbH2X}*)Fw)`3U*%izbeug~?e5t%9zmxF(_wg1v z%e;rZ@|smKV4wk-xahkyP`@qhi4zVwhwJ+jeo4N&BvWxQG4m`C56|Elg=zgjf^JvQ zl?dF0wd82@2s%Jp{1){Do%`%9Y5^za&%*)5F-E$ZP9cZ;N?14lW)K{U#Xn=;_rdl@CM$gR+de}O=YwRPUMG}n;YSP01pF& z)8U`1zstzakoYgp(D=})aQgbiJxyEzJgea zo1`)8A!ENw)WikfpxHpl)t;R|dfY@qdLb_}(a9Z2QWNkn|2Xbf@Vvjrj~MGJXQ>68Ct~OU5?lz~Er2(7R)gO$KCnEN^#E&_dDR4s z>C%`6Z#OxX&wZ2kISqc!v|~B z3P=>>zd`aIg8u>_29U)M{8czW#_-D#iJ^scEg;*lW$WjKRdqxG9})SpuxZ<}QaH8I zhJ&pw-j-@LQDpvH9NczNf15@{G$j0WcxJ6NghTBJ4Q~aGH~AllBpQlecaA(SzPiM;|`;ZSV1eM-QM!P($nSZTr0SZEg0JqpfZG@XPi}o|TVf zJIZm=em8#Cz7jueUx8O{A9?y)yaI(xx^>@PsazQ@Xb3ZZhxm7now>K%ykbP*pi2!i?)@x#%JiiSSddN`7cN62Tv=bGZ_rWR#j-q;x>pDV12H zb^_H(D8@-0wE^y=n5DQM@Og^cDINhFp*TeGG~hVJ0gCy*Kq3k5*n0A z#@5l?Iv6?Ws1-(DI`Y9NKt{#qVKf3V2J-ierT|4jVp*1ERGc7a7^(yb-3vnF%K(`` zoFLP{P7_w-0ptbg0|@|&L++7^^8kiHNT7%<`vg3LnZp+!!BC ze}~=xVL|G4z>~1Kc0g`i+&)^|0iyuOFbIjzPG?O4;z0P9GOUlVh3f#RvJApK-WLk% z0qx8(b(HY{@@5$+W%>XGvW%NB#X&$LAlE>G4v2s^ow!sy185Fp0mKJv4EG$oR4f{S z{6S>KyQH%NA7KzrxS3DwnQ_GJXtMk&uBXUb5z?0qd-6<6~!@EfvE}A_XBCEyiT2rViMKtY$G@Gu5;K+mY2Q#tIM( zTsYUl4L^F72LoyNj>nJl7&vjxwsikcj|@IjiMh@^ZTZNOvOH%+EJ8Af8P+_gJFfdk z=h9E`&5spx>dufN!7b5i@HVaS99vG)LS*XHFs;bbZej1j%xFCmAb<^pRPkc zq>ox}SpQ;0PK3(A@fx<4-N>q0BfE*+#yVJXLDd6q*Ei@rdcHwms5V3l*&U-EzjV&MLFot#ek+3X^?s`N4zF2HEb5Y=mu=yXBqo za)m^pQ_Lu$ilic-JgW>T8&pn}KrK{{t0&b{>KV09)2Hdz1T-dXowi=vpk2_Wv`8n= zUDl21uIa{g?Ru|XW?&7RA#S*695%X5ttOAD-6Szf&DCa^IclCY$INqP&V1H#&T`%o zv|Q}AIITOayR6MtH_0r3&=@>~tcWdVrEE1j$xg8|>?|8&`{e!dfc%{Nyu40PuShA7 zlCKmh$CTHU5#^-PtLjj7s`^x{TBX*hP3k&zT%Ax4YeqC7&6uWD>(RDpy;`ZRS|`)Z z>S8)hcTRs^e^KAP)3D3nHi(R3W4Tdclp3du)5aNN)HrMGH=QvBOlM8!O!a2F*=a@= zzC~aWT11v>mWXBCGG*zoc3OSbK5M@f1rcclq+)ffiLGPn*#w(pQ|yR5B)=>llV6j2 z6m5!jg;&v`2w2WS?H4V>mIkYH3)JqmwpvjLArm+dutHYMN}%>}sC}A^LhU|zpZtvc atlXriQ`i+w#R61MYQGHCliJ}VcK;t|AO9-= diff --git a/bin/mimalloc-redirect32.lib b/bin/mimalloc-redirect32.lib old mode 100644 new mode 100755 index e29272506cf314ede4bff04eb7a1d7949739c43d..bf64978793de2eb34b5ec714fcffa813c7188d88 GIT binary patch delta 106 zcmew$_Caie84Js^=+G;ZtyuOj{+z7It}$7j)sFG!<~UX(MozuEdu|?OU|?XHY|kP+ wc?(+!Scw6UjANCaT*@BHx!9)l;X|kjv&n)SjbIfrlh<=_KvZqM%rTb<0OMsOVgLXD delta 106 zcmew$_Caie84F9)#;yyKtyuOj{+O)Et}$7j)sFGU<~UX(Mow+}6ZelYFfcGpwr7!^ wyoId Date: Fri, 28 Mar 2025 13:28:10 -0700 Subject: [PATCH 278/352] update readme --- readme.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/readme.md b/readme.md index 81f2057e..0727fba9 100644 --- a/readme.md +++ b/readme.md @@ -84,6 +84,9 @@ Enjoy! ### Releases +* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3-beta`: Various small bug and build fixes, including: + fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, + fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile. From 8a81fc73c88750085ea40ae4dfb49955bdf93c71 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 28 Mar 2025 14:18:28 -0700 Subject: [PATCH 279/352] update readme --- readme.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/readme.md b/readme.md index 0727fba9..87cee98d 100644 --- a/readme.md +++ b/readme.md @@ -12,9 +12,9 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release : `v3.0.2` (beta) (2025-03-06). -Latest v2 release: `v2.2.2` (2025-03-06). -Latest v1 release: `v1.9.2` (2024-03-06). +Latest release : `v3.0.3` (beta) (2025-03-28). +Latest v2 release: `v2.2.3` (2025-03-28). +Latest v1 release: `v1.9.3` (2024-03-28). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: From a78374d816a84b5c72db3842eaada88785cc027e Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 28 Mar 2025 14:22:02 -0700 Subject: [PATCH 280/352] bump version to 1.9.4 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index aeea621f..0446485b 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 1) set(mi_version_minor 9) -set(mi_version_patch 3) +set(mi_version_patch 4) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 8ccfcec3..97cf7856 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 193 // major + 2 digits minor +#define MI_MALLOC_VERSION 194 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From f2539bbe343c7f5c2c862d2d0cd9f1094ef4289e Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 28 Mar 2025 15:37:11 -0700 Subject: [PATCH 281/352] update readme --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 87cee98d..5a495275 100644 --- a/readme.md +++ b/readme.md @@ -84,7 +84,7 @@ Enjoy! ### Releases -* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3-beta`: Various small bug and build fixes, including: +* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. From cbab63f6c9935080052dd2648d4480013d6ec2c7 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 30 Mar 2025 16:15:27 -0700 Subject: [PATCH 282/352] fix release build warning (unused mi_stat_total_print) --- src/stats.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/stats.c b/src/stats.c index 07ce7d16..92bc049c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -218,12 +218,14 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int _mi_fprintf(out, arg, "\n"); } +#if MI_STAT>1 static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { _mi_fprintf(out, arg, "%10s:", msg); _mi_fprintf(out, arg, "%12s", " "); // no peak mi_print_amount(stat->total, unit, out, arg); _mi_fprintf(out, arg, "\n"); } +#endif static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); From 07743454e5a04356144918e32b1e2ce8e80c1726 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 31 Mar 2025 10:57:16 -0700 Subject: [PATCH 283/352] fix dynamic override test on non-windows platforms --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cc7ec5c..879aa668 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -729,8 +729,8 @@ if (MI_BUILD_TESTS) endif() target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress-dynamic PRIVATE include) - target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version if(WIN32) + target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 $) else() if(APPLE) From a9e94674299479588b742cefa3ebe36bb72cc83b Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 31 Mar 2025 11:00:05 -0700 Subject: [PATCH 284/352] make dynamic override test verbose --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 879aa668..66b24d89 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -731,14 +731,14 @@ if (MI_BUILD_TESTS) target_include_directories(mimalloc-test-stress-dynamic PRIVATE include) if(WIN32) target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version - add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 $) + add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $) else() if(APPLE) set(LD_PRELOAD "DYLD_INSERT_LIBRARIES") else() set(LD_PRELOAD "LD_PRELOAD") endif() - add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 ${LD_PRELOAD}=$ $) + add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 ${LD_PRELOAD}=$ $) endif() endif() endif() From e1110cdb9f64ec319f91fb5b5607bffd3ed76559 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 31 Mar 2025 11:02:10 -0700 Subject: [PATCH 285/352] nicer cmake logic for windows override test --- CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 66b24d89..591ba130 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -724,13 +724,11 @@ if (MI_BUILD_TESTS) if(MI_BUILD_SHARED AND NOT (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN)) add_executable(mimalloc-test-stress-dynamic test/test-stress.c) target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE ${mi_defines} "USE_STD_MALLOC=1") - if(WIN32) - target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1") - endif() target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress-dynamic PRIVATE include) if(WIN32) - target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version + target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1") # link mi_version + target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # link mi_version add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $) else() if(APPLE) From 77b622511ad86ff7ba01154dfca86dafe9032b86 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 31 Mar 2025 14:44:46 -0700 Subject: [PATCH 286/352] fix alpine compilation with prctl.h (issue #1059) --- src/prim/unix/prim.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 32004fe4..09aa91b5 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -31,10 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include - #include // PR_SET_VMA - //#if defined(MI_NO_THP) - #include // THP disable - //#endif + #include // THP disable, PR_SET_VMA #if defined(__GLIBC__) #include // linux mmap flags #else @@ -208,7 +205,7 @@ static int unix_madvise(void* addr, size_t size, int advice) { static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) { void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */); - #if (defined(__linux__) && defined(PR_SET_VMA)) + #if defined(__linux__) && defined(PR_SET_VMA) if (p!=MAP_FAILED && p!=NULL) { prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc"); } From fe47ec625db55a51eb5cc66655a8dfb06e03b6a6 Mon Sep 17 00:00:00 2001 From: Eduard Voronkin Date: Mon, 31 Mar 2025 15:51:26 -0700 Subject: [PATCH 287/352] fix recursion in TLS init on Android --- src/options.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 772dfe66..610fe5ba 100644 --- a/src/options.c +++ b/src/options.c @@ -425,14 +425,14 @@ static mi_decl_noinline void mi_recurse_exit_prim(void) { } static bool mi_recurse_enter(void) { - #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return false; #endif return mi_recurse_enter_prim(); } static void mi_recurse_exit(void) { - #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return; #endif mi_recurse_exit_prim(); From 235a0390eec64c54f97e6bcbfc0e24307031812a Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:34:00 -0700 Subject: [PATCH 288/352] refactor numa_node_count --- include/mimalloc/internal.h | 22 +++------------------ src/os.c | 39 ++++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 5b3e7e23..51fad09c 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -140,9 +140,11 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); - void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); +int _mi_os_numa_node_count(void); +int _mi_os_numa_node(void); + // arena.c mi_arena_id_t _mi_arena_id_none(void); void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid); @@ -813,24 +815,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) { return x; } -// ------------------------------------------------------------------- -// Optimize numa node access for the common case (= one node) -// ------------------------------------------------------------------- - -int _mi_os_numa_node_get(void); -size_t _mi_os_numa_node_count_get(void); - -extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count; -static inline int _mi_os_numa_node(void) { - if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } - else return _mi_os_numa_node_get(); -} -static inline size_t _mi_os_numa_node_count(void) { - const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); - if mi_likely(count > 0) { return count; } - else return _mi_os_numa_node_count_get(); -} - // ----------------------------------------------------------------------- diff --git a/src/os.c b/src/os.c index 12cc5da3..894e3a45 100644 --- a/src/os.c +++ b/src/os.c @@ -696,34 +696,47 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) { } } + /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count +static _Atomic(size_t) mi_numa_node_count; // = 0 // cache the node count -size_t _mi_os_numa_node_count_get(void) { - size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); - if (count <= 0) { +int _mi_os_numa_node_count(void) { + size_t count = mi_atomic_load_acquire(&mi_numa_node_count); + if mi_unlikely(count == 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? - if (ncount > 0) { + if (ncount > 0 && ncount < INT_MAX) { count = (size_t)ncount; } else { - count = _mi_prim_numa_node_count(); // or detect dynamically - if (count == 0) count = 1; + const size_t n = _mi_prim_numa_node_count(); // or detect dynamically + if (n == 0 || n > INT_MAX) { count = 1; } + else { count = n; } } - mi_atomic_store_release(&_mi_numa_node_count, count); // save it + mi_atomic_store_release(&mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); } - return count; + mi_assert_internal(count > 0 && count <= INT_MAX); + return (int)count; } -int _mi_os_numa_node_get(void) { - size_t numa_count = _mi_os_numa_node_count(); +static int mi_os_numa_node_get(void) { + int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = _mi_prim_numa_node(); + const size_t n = _mi_prim_numa_node(); + int numa_node = (n < INT_MAX ? (int)n : 0); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - return (int)numa_node; + return numa_node; +} + +int _mi_os_numa_node(void) { + if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { + return 0; + } + else { + return mi_os_numa_node_get(); + } } From 3c3600f85fe4a6d7f01a9e69db3cada9e63627cb Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:36:01 -0700 Subject: [PATCH 289/352] add atomic_cas_ptr_strong_acq_rel --- include/mimalloc/atomic.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 39ff5c90..e8bac316 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -111,6 +111,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) @@ -120,6 +121,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) @@ -303,6 +305,7 @@ static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) From 5a58df6534cbc8673a655e5772461ef7fd4bcbcb Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:40:30 -0700 Subject: [PATCH 290/352] fix signed compare warning --- src/arena.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index a7c20764..bdae8da1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1007,17 +1007,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (pages == 0) return 0; // pages per numa node - size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); - if (numa_count <= 0) numa_count = 1; + int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count()); + if (numa_count == 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes - for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if ((size_t)numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; From d767dbfbb45e2e38502b03dbb57698845899d34f Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:50:36 -0700 Subject: [PATCH 291/352] use C++ compilation with clang-cl (as well as msvc) on Windows --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 591ba130..a3acf83e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,7 +173,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall) endif() -if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") +# force C++ compilation with msvc or clang-cl to use modern C++ atomics +if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL) set(MI_USE_CXX "ON") endif() From 8c99ac1bbd9c692239bbf70c40f9be578d54d394 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 11:16:33 -0700 Subject: [PATCH 292/352] fix typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 5a495275..70a25fc1 100644 --- a/readme.md +++ b/readme.md @@ -177,7 +177,7 @@ mimalloc is used in various large scale low-latency services and programs, for e Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build. The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the -`mimalloc-override-dll` project builds DLL for overriding malloc +`mimalloc-override-dll` project builds a DLL for overriding malloc in the entire program. ## Linux, macOS, BSD, etc. From 9f5a2969b801bee83716f1436fb08bd6c6099b11 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Apr 2025 12:08:26 -0700 Subject: [PATCH 293/352] clarify v3 tag --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 87cee98d..5a495275 100644 --- a/readme.md +++ b/readme.md @@ -84,7 +84,7 @@ Enjoy! ### Releases -* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3-beta`: Various small bug and build fixes, including: +* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. From bc8eca8bf2641f12ecc23d7527aecdb62d6d2939 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Apr 2025 12:09:09 -0700 Subject: [PATCH 294/352] typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 70a25fc1..cee78898 100644 --- a/readme.md +++ b/readme.md @@ -84,7 +84,7 @@ Enjoy! ### Releases -* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: +* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including: fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. From af21001f7a65eafb8fb16460b018ebf9d75e2ad8 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Apr 2025 12:17:45 -0700 Subject: [PATCH 295/352] clarify to use as the PR branch --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 5a495275..8b2ada1f 100644 --- a/readme.md +++ b/readme.md @@ -73,7 +73,7 @@ Enjoy! ### Branches * `master`: latest stable release (still based on `dev2`). -* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. +* `dev`: development branch for mimalloc v1. **Use this branch for submitting PR's**. * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage mimalloc pages that can reduce fragmentation. From 951538d469f72532e43a2437d556a73518f5dde4 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 8 Apr 2025 13:56:31 -0700 Subject: [PATCH 296/352] fix prctl.h includes for alpine linux/musl (hopefully fixes #1065, #1066, #1067) --- src/prim/unix/prim.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 09aa91b5..e3888e73 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -32,6 +32,9 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include #include // THP disable, PR_SET_VMA + #if !defined(PR_SET_VMA) + #include + #endif #if defined(__GLIBC__) #include // linux mmap flags #else From 7543e8989abe41e87f260424b3711931d680da77 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 13 Apr 2025 19:49:47 -0700 Subject: [PATCH 297/352] validate pointer before assertion in mi_free_size (issue #754) --- src/free.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/free.c b/src/free.c index 7e529530..22284135 100644 --- a/src/free.c +++ b/src/free.c @@ -340,7 +340,11 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { void mi_free_size(void* p, size_t size) mi_attr_noexcept { MI_UNUSED_RELEASE(size); + #if MI_DEBUG + mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free_size"); + if mi_unlikely(segment==NULL) return; mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); + #endif mi_free(p); } From fae61ed946ceaf88f8e902aa596bb46305b531d6 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 13 Apr 2025 19:56:49 -0700 Subject: [PATCH 298/352] fix assertion in mi_free_size (issue #754) --- src/free.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/free.c b/src/free.c index 22284135..d0940a16 100644 --- a/src/free.c +++ b/src/free.c @@ -341,9 +341,8 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { void mi_free_size(void* p, size_t size) mi_attr_noexcept { MI_UNUSED_RELEASE(size); #if MI_DEBUG - mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free_size"); - if mi_unlikely(segment==NULL) return; - mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); + const size_t available = _mi_usable_size(p,"mi_free_size"); + mi_assert(p == NULL || size <= available || available == 0 /* invalid pointer */ ); #endif mi_free(p); } From aad0bc2ae3bc498b8e405d3f89be90c22abe76d8 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 2 May 2025 08:09:40 -0700 Subject: [PATCH 299/352] fix cast on msvc --- include/mimalloc/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 51fad09c..6283f1d1 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -855,7 +855,7 @@ static inline size_t mi_clz(size_t x) { #else _BitScanReverse64(&idx, x); #endif - return ((MI_SIZE_BITS - 1) - idx); + return ((MI_SIZE_BITS - 1) - (size_t)idx); } static inline size_t mi_ctz(size_t x) { if (x==0) return MI_SIZE_BITS; @@ -865,7 +865,7 @@ static inline size_t mi_ctz(size_t x) { #else _BitScanForward64(&idx, x); #endif - return idx; + return (size_t)idx; } #else From 6bfb1c656c6da4eaa8dab4d8fc0197f1ed946483 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 2 May 2025 08:40:21 -0700 Subject: [PATCH 300/352] allow size==0 for mi_prim_free (issue #1041) --- src/os.c | 14 +++++++------- src/prim/emscripten/prim.c | 4 ++-- src/prim/unix/prim.c | 5 +++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/os.c b/src/os.c index 894e3a45..be7e532c 100644 --- a/src/os.c +++ b/src/os.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2025, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -167,8 +167,8 @@ static void mi_os_free_huge_os_pages(void* p, size_t size); static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { mi_assert_internal((size % _mi_os_page_size()) == 0); - if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) - int err = _mi_prim_free(addr, size); + if (addr == NULL) return; // || _mi_os_is_huge_reserved(addr) + int err = _mi_prim_free(addr, size); // allow size==0 (issue #1041) if (err != 0) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } @@ -186,10 +186,10 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me void* base = addr; // different base? (due to alignment) if (memid.mem.os.base != base) { - mi_assert(memid.mem.os.base <= addr); + mi_assert(memid.mem.os.base <= addr); base = memid.mem.os.base; const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base; - if (memid.mem.os.size==0) { + if (memid.mem.os.size==0) { csize += diff; } if (still_committed) { @@ -733,8 +733,8 @@ static int mi_os_numa_node_get(void) { } int _mi_os_numa_node(void) { - if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { - return 0; + if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { + return 0; } else { return mi_os_numa_node_get(); diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index 82147de7..a8677cbc 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai +Copyright (c) 2018-2025, Microsoft Research, Daan Leijen, Alon Zakai This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -58,7 +58,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config) { extern void emmalloc_free(void*); int _mi_prim_free(void* addr, size_t size) { - MI_UNUSED(size); + if (size==0) return 0; emmalloc_free(addr); return 0; } diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index e3888e73..f93e458a 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2025, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -70,7 +70,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MADV_FREE POSIX_MADV_FREE #endif - + //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. // and do allocation themselves; using syscalls prevents recursion when mimalloc is @@ -186,6 +186,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) //--------------------------------------------- int _mi_prim_free(void* addr, size_t size ) { + if (size==0) return 0; bool err = (munmap(addr, size) == -1); return (err ? errno : 0); } From 2c34250f43c0c06cdc3c405781eed90daf008361 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 2 May 2025 08:55:16 -0700 Subject: [PATCH 301/352] extend override test on windows --- test/main-override-dep.cpp | 38 +++++++++++++++++++++++++++++++++++++- test/main-override.cpp | 5 +++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/test/main-override-dep.cpp b/test/main-override-dep.cpp index e92f6fc4..d89e3fca 100644 --- a/test/main-override-dep.cpp +++ b/test/main-override-dep.cpp @@ -1,6 +1,7 @@ // Issue #981: test overriding allocation in a DLL that is compiled independent of mimalloc. // This is imported by the `mimalloc-test-override` project. #include +#include #include "main-override-dep.h" std::string TestAllocInDll::GetString() @@ -10,6 +11,41 @@ std::string TestAllocInDll::GetString() const char* t = "test"; memcpy(test, t, 4); std::string r = test; + std::cout << "override-dep: GetString: " << r << "\n"; delete[] test; return r; -} \ No newline at end of file +} + + +class Static { +private: + void* p; +public: + Static() { + printf("override-dep: static constructor\n"); + p = malloc(64); + return; + } + ~Static() { + free(p); + printf("override-dep: static destructor\n"); + return; + } +}; + +static Static s = Static(); + + +#include + +BOOL WINAPI DllMain(HINSTANCE module, DWORD reason, LPVOID reserved) { + (void)(reserved); + (void)(module); + if (reason==DLL_PROCESS_ATTACH) { + printf("override-dep: dll attach\n"); + } + else if (reason==DLL_PROCESS_DETACH) { + printf("override-dep: dll detach\n"); + } + return TRUE; +} diff --git a/test/main-override.cpp b/test/main-override.cpp index db594acc..576f47bc 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -37,7 +37,7 @@ static void test_thread_local(); // issue #944 static void test_mixed1(); // issue #942 static void test_stl_allocators(); -#if x_WIN32 +#if _WIN32 #include "main-override-dep.h" static void test_dep(); // issue #981: test overriding in another DLL #else @@ -145,11 +145,12 @@ static bool test_stl_allocator1() { struct some_struct { int i; int j; double z; }; -#if x_WIN32 +#if _WIN32 static void test_dep() { TestAllocInDll t; std::string s = t.GetString(); + std::cout << "test_dep GetString: " << s << "\n"; } #endif From f3e92b1edd851a4d1e2e2c4bbada87f2855dc834 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 2 May 2025 16:12:20 -0700 Subject: [PATCH 302/352] Use second user TLS slot to avoid using reserved fields in the TEB (issue #1078) --- include/mimalloc/prim.h | 15 ++++++++------- src/prim/windows/prim.c | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index bddd66e9..b0ddc2d0 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -208,19 +208,20 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) // On windows we can store the thread-local heap at a fixed TLS slot to avoid -// thread-local initialization checks in the fast path. This uses a fixed location -// in the TCB though (last user-reserved slot by default) which may clash with other applications. - +// thread-local initialization checks in the fast path. +// We always use the second user TLS slot (the first one is always allocated already), +// and at initialization (`windows/prim.c`) we call TlsAlloc and verify +// we indeed get the second slot (and fail otherwise). +// Todo: we could make the Tls slot completely dynamic but that would require +// an extra read of the static Tls slot instead of using a constant offset. #define MI_HAS_TLS_SLOT 2 // 2 = we can reliably initialize the slot (saving a test on each malloc) #if MI_WIN_USE_FIXED_TLS > 1 #define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS) #elif MI_SIZE_SIZE == 4 -#define MI_TLS_SLOT (0x710) // Last user-reserved slot -// #define MI_TLS_SLOT (0xF0C) // Last TlsSlot (might clash with other app reserved slot) +#define MI_TLS_SLOT (0x0E18) // Second User TLS slot #else -#define MI_TLS_SLOT (0x888) // Last user-reserved slot -// #define MI_TLS_SLOT (0x1678) // Last TlsSlot (might clash with other app reserved slot) +#define MI_TLS_SLOT (0x1488) // Second User TLS slot #endif static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index a080f4bc..7daa09ef 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -631,8 +631,20 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); #if MI_TLS_SLOT >= 2 - if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + if (reason==DLL_PROCESS_ATTACH) { + const DWORD tls_slot = TlsAlloc(); + if (tls_slot != 1) { + _mi_error_message(EFAULT, "unable to allocate the second TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + } + } + if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { + if (mi_prim_get_default_heap() == NULL) { + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + } + #if MI_DEBUG + void* const p = TlsGetValue(1); + mi_assert_internal(p == (void*)&_mi_heap_empty); + #endif } #endif if (reason==DLL_PROCESS_ATTACH) { From 417e8176bddce86c2d53656c5552f5bb96304c46 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 2 May 2025 16:27:00 -0700 Subject: [PATCH 303/352] add unix large page size constant and adjust aligment to the large page size for large allocations --- src/prim/unix/prim.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index f93e458a..f3ccb013 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -70,6 +70,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MADV_FREE POSIX_MADV_FREE #endif +#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this? //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. @@ -156,7 +157,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) } #endif } - config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE; config->has_overcommit = unix_detect_overcommit(); config->has_partial_free = true; // mmap can free in parts config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) @@ -386,6 +387,9 @@ int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool comm mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); + if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) { + try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations + } *is_zero = true; int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); @@ -433,7 +437,7 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) err = unix_madvise(start, size, MADV_DONTNEED); - #if !MI_DEBUG && !MI_SECURE + #if !MI_DEBUG && MI_SECURE<=2 *needs_recommit = false; #else *needs_recommit = true; From e4c5d09d65ff7743fe5e5dfadd6f082e839ff791 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 09:04:57 -0700 Subject: [PATCH 304/352] improve TLS access on Windows with msvc (by Frank Richter, issue #1078) --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 ++-- include/mimalloc/prim.h | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index d6af71ce..128a4ff6 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index b0ddc2d0..a722d721 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) // On windows we can store the thread-local heap at a fixed TLS slot to avoid -// thread-local initialization checks in the fast path. +// thread-local initialization checks in the fast path. // We always use the second user TLS slot (the first one is always allocated already), // and at initialization (`windows/prim.c`) we call TlsAlloc and verify // we indeed get the second slot (and fail otherwise). @@ -270,6 +270,9 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly +#ifdef _MSC_VER +__declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) +#endif extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern bool _mi_process_is_initialized; // has mi_process_init been called? From f989a1cbb9b63043f1e56d248efe1ede9a6651d7 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 09:10:38 -0700 Subject: [PATCH 305/352] add more decl_hidden specifiers on extern variables to improve access on arm64 --- include/mimalloc/internal.h | 6 +++--- include/mimalloc/prim.h | 6 +++--- src/page.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 51fad09c..c9362aa0 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -96,7 +96,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c -extern mi_decl_cache_align mi_stats_t _mi_stats_main; +extern mi_decl_hidden mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_hidden mi_decl_cache_align const mi_page_t _mi_page_empty; void _mi_process_load(void); void mi_cdecl _mi_process_done(void); @@ -958,8 +958,8 @@ static inline size_t mi_popcount(size_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include -extern bool _mi_cpu_has_fsrm; -extern bool _mi_cpu_has_erms; +extern mi_decl_hidden bool _mi_cpu_has_fsrm; +extern mi_decl_hidden bool _mi_cpu_has_erms; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) { __movsb((unsigned char*)dst, (const unsigned char*)src, n); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index a722d721..527bb97a 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -273,8 +273,8 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #ifdef _MSC_VER __declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) #endif -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; @@ -402,7 +402,7 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) { #elif defined(MI_TLS_PTHREAD) -extern pthread_key_t _mi_heap_default_key; +extern mi_decl_hidden pthread_key_t _mi_heap_default_key; static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); diff --git a/src/page.c b/src/page.c index 6a693e89..55150f33 100644 --- a/src/page.c +++ b/src/page.c @@ -114,7 +114,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); @@ -979,9 +979,9 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // free delayed frees from other threads (but skip contended ones) _mi_heap_delayed_free_partial(heap); - + // collect every once in a while (10000 by default) - const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); + const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); if (heap->generic_collect_count >= generic_collect) { heap->generic_collect_count = 0; mi_heap_collect(heap, false /* force? */); From 9194362e4858bdd2eaf1b1cb9075abaa6ace2460 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 09:04:57 -0700 Subject: [PATCH 306/352] improve TLS access on Windows with msvc (by Frank Richter, issue #1078) --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 ++-- include/mimalloc/prim.h | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index d6af71ce..128a4ff6 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index b0ddc2d0..a722d721 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) // On windows we can store the thread-local heap at a fixed TLS slot to avoid -// thread-local initialization checks in the fast path. +// thread-local initialization checks in the fast path. // We always use the second user TLS slot (the first one is always allocated already), // and at initialization (`windows/prim.c`) we call TlsAlloc and verify // we indeed get the second slot (and fail otherwise). @@ -270,6 +270,9 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly +#ifdef _MSC_VER +__declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) +#endif extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern bool _mi_process_is_initialized; // has mi_process_init been called? From 9c24c428cb06c735ccc3dcca996c2d09bb139d08 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 09:10:38 -0700 Subject: [PATCH 307/352] add more decl_hidden specifiers on extern variables to improve access on arm64 --- include/mimalloc/internal.h | 6 +++--- include/mimalloc/prim.h | 6 +++--- src/page.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 6283f1d1..b11bd357 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -96,7 +96,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c -extern mi_decl_cache_align mi_stats_t _mi_stats_main; +extern mi_decl_hidden mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_hidden mi_decl_cache_align const mi_page_t _mi_page_empty; void _mi_process_load(void); void mi_cdecl _mi_process_done(void); @@ -958,8 +958,8 @@ static inline size_t mi_popcount(size_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include -extern bool _mi_cpu_has_fsrm; -extern bool _mi_cpu_has_erms; +extern mi_decl_hidden bool _mi_cpu_has_fsrm; +extern mi_decl_hidden bool _mi_cpu_has_erms; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) { __movsb((unsigned char*)dst, (const unsigned char*)src, n); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index a722d721..527bb97a 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -273,8 +273,8 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #ifdef _MSC_VER __declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) #endif -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; @@ -402,7 +402,7 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) { #elif defined(MI_TLS_PTHREAD) -extern pthread_key_t _mi_heap_default_key; +extern mi_decl_hidden pthread_key_t _mi_heap_default_key; static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); diff --git a/src/page.c b/src/page.c index 6a693e89..55150f33 100644 --- a/src/page.c +++ b/src/page.c @@ -114,7 +114,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); @@ -979,9 +979,9 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // free delayed frees from other threads (but skip contended ones) _mi_heap_delayed_free_partial(heap); - + // collect every once in a while (10000 by default) - const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); + const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); if (heap->generic_collect_count >= generic_collect) { heap->generic_collect_count = 0; mi_heap_collect(heap, false /* force? */); From 63b0989df57a9dd2b867920307b0d038df695a54 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 4 May 2025 21:41:26 -0700 Subject: [PATCH 308/352] revert use of selectany for msvc (issue #1078) --- include/mimalloc/prim.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 527bb97a..2d508148 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -270,9 +270,6 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly -#ifdef _MSC_VER -__declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) -#endif extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? From 52b75693c48308e8b19b94ffa7fbc0580021ba87 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 22:03:10 -0700 Subject: [PATCH 309/352] use TlsAlloc with a dynamic offset for MI_WIN_USE_FIXED_TLS by default (issue #1078) --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 ++-- include/mimalloc/prim.h | 13 ++++++------- src/prim/windows/prim.c | 15 ++++++++++----- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index 128a4ff6..d6af71ce 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 2d508148..60af4d59 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -209,19 +209,18 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // On windows we can store the thread-local heap at a fixed TLS slot to avoid // thread-local initialization checks in the fast path. -// We always use the second user TLS slot (the first one is always allocated already), -// and at initialization (`windows/prim.c`) we call TlsAlloc and verify -// we indeed get the second slot (and fail otherwise). -// Todo: we could make the Tls slot completely dynamic but that would require -// an extra read of the static Tls slot instead of using a constant offset. +// We allocate a user TLS slot at process initialization (see `windows/prim.c`) +// and store the offset `_mi_win_tls_offset`. #define MI_HAS_TLS_SLOT 2 // 2 = we can reliably initialize the slot (saving a test on each malloc) +extern mi_decl_hidden size_t _mi_win_tls_offset; + #if MI_WIN_USE_FIXED_TLS > 1 #define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS) #elif MI_SIZE_SIZE == 4 -#define MI_TLS_SLOT (0x0E18) // Second User TLS slot +#define MI_TLS_SLOT (0x0E10 + _mi_win_tls_offset) // User TLS slots #else -#define MI_TLS_SLOT (0x1488) // Second User TLS slot +#define MI_TLS_SLOT (0x1480 + _mi_win_tls_offset) // User TLS slots #endif static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 7daa09ef..c91102a2 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -627,22 +627,27 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { // Process & Thread Init/Done //---------------------------------------------------------------- +#if MI_HAS_TLS_SLOT +mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*); // use 2nd slot by default +#endif + static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); - #if MI_TLS_SLOT >= 2 + #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if (reason==DLL_PROCESS_ATTACH) { const DWORD tls_slot = TlsAlloc(); - if (tls_slot != 1) { - _mi_error_message(EFAULT, "unable to allocate the second TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + if (tls_slot == TLS_OUT_OF_INDEXES) { + _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); } + _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); } if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { if (mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); } #if MI_DEBUG - void* const p = TlsGetValue(1); + void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); mi_assert_internal(p == (void*)&_mi_heap_empty); #endif } @@ -808,7 +813,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain - #if MI_TLS_SLOT >= 2 + #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); } From 303b196d403876f324e7456854a148e85682c2d9 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 22:09:56 -0700 Subject: [PATCH 310/352] fix MI_WIN_USE_FIXED_TLS conditions --- src/prim/windows/prim.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index c91102a2..d0fee4c2 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -627,7 +627,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { // Process & Thread Init/Done //---------------------------------------------------------------- -#if MI_HAS_TLS_SLOT +#if MI_WIN_USE_FIXED_TLS==1 mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*); // use 2nd slot by default #endif @@ -635,6 +635,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation + #if MI_WIN_USE_FIXED_TLS==1 if (reason==DLL_PROCESS_ATTACH) { const DWORD tls_slot = TlsAlloc(); if (tls_slot == TLS_OUT_OF_INDEXES) { @@ -642,11 +643,12 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { } _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); } + #endif if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { if (mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); } - #if MI_DEBUG + #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); mi_assert_internal(p == (void*)&_mi_heap_empty); #endif From e2d7c24c7362a19429f7338f0e5ed493f7c1d7b0 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 4 May 2025 22:17:59 -0700 Subject: [PATCH 311/352] add fixed TLS slot test to pipeline on Windows --- azure-pipelines.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 25d4a6e0..b7fc59d4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -42,6 +42,14 @@ jobs: BuildType: release cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32 MSBuildConfiguration: Release + Debug Fixed TLS: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_WIN_USE_FIXED_TLS=ON + MSBuildConfiguration: Debug + Release Fixed TLS: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_WIN_USE_FIXED_TLS=ON + MSBuildConfiguration: Release steps: - task: CMake@1 inputs: From c84d996e884412b1fa58fa48ee6fc6e2fa841446 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 5 May 2025 10:23:52 -0700 Subject: [PATCH 312/352] fix TLS initialization for MI_WIN_USE_FIXED_TLS with redirection --- include/mimalloc/prim.h | 2 +- src/prim/windows/prim.c | 28 +++++++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 60af4d59..d3157949 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -119,7 +119,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); - +#define MI_WIN_USE_FIXED_TLS 1 //------------------------------------------------------------------- // Access to TLS (thread local storage) slots. diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index d0fee4c2..535d34a6 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -628,18 +628,16 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { //---------------------------------------------------------------- #if MI_WIN_USE_FIXED_TLS==1 -mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*); // use 2nd slot by default +mi_decl_cache_align size_t _mi_win_tls_offset = 0; #endif -static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { - MI_UNUSED(reserved); - MI_UNUSED(module); +static void mi_win_tls_init(DWORD reason) { #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation #if MI_WIN_USE_FIXED_TLS==1 - if (reason==DLL_PROCESS_ATTACH) { - const DWORD tls_slot = TlsAlloc(); - if (tls_slot == TLS_OUT_OF_INDEXES) { - _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + if (reason==DLL_PROCESS_ATTACH && _mi_win_tls_offset == 0) { + const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 + if (tls_slot == TLS_OUT_OF_INDEXES) { + _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); } _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); } @@ -653,7 +651,15 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { mi_assert_internal(p == (void*)&_mi_heap_empty); #endif } + #else + MI_UNUSED(reason); #endif +} + +static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { + MI_UNUSED(reserved); + MI_UNUSED(module); + mi_win_tls_init(reason); if (reason==DLL_PROCESS_ATTACH) { _mi_process_load(); } @@ -815,11 +821,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain - #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation - if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); - } - #endif + mi_win_tls_init(reason); if (reason == DLL_PROCESS_ATTACH) { mi_redirected = true; } From 0184a86eaf4cf0018d544e5992b86f5ede688601 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 15:32:11 -0700 Subject: [PATCH 313/352] add alpine x86 docker file --- contrib/docker/alpine-arm32v7/Dockerfile | 2 +- contrib/docker/alpine-x86/Dockerfile | 28 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 contrib/docker/alpine-x86/Dockerfile diff --git a/contrib/docker/alpine-arm32v7/Dockerfile b/contrib/docker/alpine-arm32v7/Dockerfile index f74934fb..daa60f50 100644 --- a/contrib/docker/alpine-arm32v7/Dockerfile +++ b/contrib/docker/alpine-arm32v7/Dockerfile @@ -1,6 +1,6 @@ # install from an image # download first an appropriate tar.gz image into the current directory -# from: +# from FROM scratch # Substitute the image name that was downloaded diff --git a/contrib/docker/alpine-x86/Dockerfile b/contrib/docker/alpine-x86/Dockerfile new file mode 100644 index 00000000..a0f76c17 --- /dev/null +++ b/contrib/docker/alpine-x86/Dockerfile @@ -0,0 +1,28 @@ +# install from an image +# download first an appropriate tar.gz image into the current directory +# from +FROM scratch + +# Substitute the image name that was downloaded +ADD alpine-minirootfs-20250108-x86.tar.gz / + +# Install tools +RUN apk add build-base make cmake +RUN apk add git +RUN apk add vim + +RUN mkdir -p /home/dev +WORKDIR /home/dev + +# Get mimalloc +RUN git clone https://github.com/microsoft/mimalloc -b dev2 +RUN mkdir -p mimalloc/out/release +RUN mkdir -p mimalloc/out/debug + +# Build mimalloc debug +WORKDIR /home/dev/mimalloc/out/debug +RUN cmake ../.. -DMI_DEBUG_FULL=ON +# RUN make -j +# RUN make test + +CMD ["/bin/sh"] From 341149391fee496790a7fa916b1fd3fdd0cce1a1 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 15:33:29 -0700 Subject: [PATCH 314/352] fix include of prctl.h on alpine linux x86 --- src/prim/unix/prim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index f3ccb013..a90fa659 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -32,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include #include // THP disable, PR_SET_VMA - #if !defined(PR_SET_VMA) + #if defined(__GLIBC__) && !defined(PR_SET_VMA) #include #endif #if defined(__GLIBC__) From a6ecb5c299e65eb7dd6602b97235126acc01a868 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 15:35:29 -0700 Subject: [PATCH 315/352] fix format specifier (for alpine linux x86, issue #1086) --- src/arena.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/arena.c b/src/arena.c index bdae8da1..aa01ffcb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -44,7 +44,7 @@ typedef struct mi_arena_s { mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be purged from `blocks_purge`. - + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) @@ -365,7 +365,7 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t *arena_id) { if (_mi_preloading()) return false; // use OS only while pre loading - + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); if (arena_count > (MI_MAX_ARENAS - 4)) return false; @@ -407,7 +407,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (!mi_option_is_enabled(mi_option_disallow_arena_alloc)) { // is arena allocation allowed? - if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid); if (p != NULL) return p; @@ -487,7 +487,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks) // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory). mi_assert_internal(already_committed < blocks); mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); - needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed)); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed)); } // clear the purged blocks @@ -556,7 +556,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) { // check pre-conditions if (arena->memid.is_pinned) return false; - + // expired yet? mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (!force && (expire == 0 || expire > now)) return false; @@ -611,7 +611,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) return any_purged; } -static void mi_arenas_try_purge( bool force, bool visit_all ) +static void mi_arenas_try_purge( bool force, bool visit_all ) { if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled @@ -628,7 +628,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all ) mi_atomic_guard(&purge_guard) { // increase global expire: at most one purge per delay cycle - mi_atomic_storei64_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay()); + mi_atomic_storei64_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay()); size_t max_purge_count = (visit_all ? max_arena : 2); bool all_visited = true; for (size_t i = 0; i < max_arena; i++) { @@ -947,7 +947,7 @@ void mi_debug_show_arenas(void) mi_attr_noexcept { for (size_t i = 0; i < max_arenas; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; - _mi_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, MI_ARENA_BLOCK_SIZE / MI_MiB, arena->field_count, (arena->memid.is_pinned ? ", pinned" : "")); + _mi_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, (size_t)(MI_ARENA_BLOCK_SIZE / MI_MiB), arena->field_count, (arena->memid.is_pinned ? ", pinned" : "")); if (show_inuse) { inuse_total += mi_debug_show_bitmap(" ", "inuse blocks", arena->block_count, arena->blocks_inuse, arena->field_count); } From 72f05e2f076b3e1b160b8aaca7bc220a2532ced0 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 15:58:45 -0700 Subject: [PATCH 316/352] fix guarded sample rate of 1 (issue #1085) --- include/mimalloc/types.h | 1 - src/init.c | 17 ++++++++--------- test/main-override-static.c | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ab697f23..e2b5d318 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -512,7 +512,6 @@ struct mi_heap_s { size_t guarded_size_min; // minimal size for guarded objects size_t guarded_size_max; // maximal size for guarded objects size_t guarded_sample_rate; // sample rate (set to 0 to disable guarded pages) - size_t guarded_sample_seed; // starting sample count size_t guarded_sample_count; // current sample count (counting down to 0) #endif mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. diff --git a/src/init.c b/src/init.c index 8a48ae5e..fe0acd8a 100644 --- a/src/init.c +++ b/src/init.c @@ -110,7 +110,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { false, // can reclaim 0, // tag #if MI_GUARDED - 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) + 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY @@ -153,7 +153,7 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = { false, // can reclaim 0, // tag #if MI_GUARDED - 0, 0, 0, 0, 0, + 0, 0, 0, 0, #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY @@ -165,15 +165,14 @@ mi_stats_t _mi_stats_main = { MI_STAT_VERSION, MI_STATS_NULL }; #if MI_GUARDED mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { - heap->guarded_sample_seed = seed; - if (heap->guarded_sample_seed == 0) { - heap->guarded_sample_seed = _mi_heap_random_next(heap); - } heap->guarded_sample_rate = sample_rate; - if (heap->guarded_sample_rate >= 1) { - heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate; + heap->guarded_sample_count = sample_rate; // count down samples + if (heap->guarded_sample_rate > 1) { + if (seed == 0) { + seed = _mi_heap_random_next(heap); + } + heap->guarded_sample_count = (seed % heap->guarded_sample_rate) + 1; // start at random count between 1 and `sample_rate` } - heap->guarded_sample_count = heap->guarded_sample_seed; // count down samples } mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) { diff --git a/test/main-override-static.c b/test/main-override-static.c index 06d7baa5..c94b98f4 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -43,7 +43,7 @@ int main() { // corrupt_free(); // block_overflow1(); // block_overflow2(); - // test_canary_leak(); + test_canary_leak(); // test_aslr(); // invalid_free(); // test_reserved(); From 0ae310327f83abd3b354bf03b819f3595be0daf2 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 13 May 2025 16:22:08 -0700 Subject: [PATCH 317/352] fix debug assertion for windows TLS --- src/prim/windows/prim.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 535d34a6..6ab715e6 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -644,12 +644,12 @@ static void mi_win_tls_init(DWORD reason) { #endif if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { if (mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 + void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); + mi_assert_internal(p == (void*)&_mi_heap_empty); + #endif } - #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 - void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); - mi_assert_internal(p == (void*)&_mi_heap_empty); - #endif } #else MI_UNUSED(reason); From df3e1916209b3783bb3d001013ce8fbba4815da6 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 13 May 2025 16:38:53 -0700 Subject: [PATCH 318/352] make windows fixed TLS opt-in --- include/mimalloc/prim.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index d3157949..fbf0cc74 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -118,9 +118,6 @@ void _mi_prim_thread_done_auto_done(void); void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); - -#define MI_WIN_USE_FIXED_TLS 1 - //------------------------------------------------------------------- // Access to TLS (thread local storage) slots. // We need fast access to both a unique thread id (in `free.c:mi_free`) and From 41cc1bfe5199fbfc4dc5e7c7ecb1453ad4e8ad7b Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 17:38:16 -0700 Subject: [PATCH 319/352] add guarded TLS test for Windows fixed TLS --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 +-- include/mimalloc/prim.h | 2 +- src/init.c | 1 - src/prim/windows/prim.c | 33 ++++++++++++++----------- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index d6af71ce..128a4ff6 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index fbf0cc74..3d8f1806 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // thread-local initialization checks in the fast path. // We allocate a user TLS slot at process initialization (see `windows/prim.c`) // and store the offset `_mi_win_tls_offset`. -#define MI_HAS_TLS_SLOT 2 // 2 = we can reliably initialize the slot (saving a test on each malloc) +#define MI_HAS_TLS_SLOT 1 // 2 = we can reliably initialize the slot (saving a test on each malloc) extern mi_decl_hidden size_t _mi_win_tls_offset; diff --git a/src/init.c b/src/init.c index fe0acd8a..ff6c5d29 100644 --- a/src/init.c +++ b/src/init.c @@ -225,7 +225,6 @@ mi_heap_t* _mi_heap_main_get(void) { return &_mi_heap_main; } - /* ----------------------------------------------------------- Sub process ----------------------------------------------------------- */ diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 6ab715e6..9ffacaa3 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -631,18 +631,23 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { mi_decl_cache_align size_t _mi_win_tls_offset = 0; #endif +//static void mi_debug_out(const char* s) { +// HANDLE h = GetStdHandle(STD_ERROR_HANDLE); +// WriteConsole(h, s, (DWORD)_mi_strlen(s), NULL, NULL); +//} + static void mi_win_tls_init(DWORD reason) { - #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation - #if MI_WIN_USE_FIXED_TLS==1 - if (reason==DLL_PROCESS_ATTACH && _mi_win_tls_offset == 0) { - const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 - if (tls_slot == TLS_OUT_OF_INDEXES) { - _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); - } - _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); - } - #endif if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { + #if MI_WIN_USE_FIXED_TLS==1 // we must allocate a TLS slot dynamically + if (_mi_win_tls_offset == 0 && reason=DLL_PROCESS_ATTACH) { + const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 + if (tls_slot == TLS_OUT_OF_INDEXES) { + _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + } + _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); + } + #endif + #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if (mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 @@ -650,10 +655,8 @@ static void mi_win_tls_init(DWORD reason) { mi_assert_internal(p == (void*)&_mi_heap_empty); #endif } - } - #else - MI_UNUSED(reason); - #endif + #endif + } } static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { @@ -676,7 +679,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #define MI_PRIM_HAS_PROCESS_ATTACH 1 // Windows DLL: easy to hook into process_init and thread_done - BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { + BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { mi_win_main((PVOID)inst,reason,reserved); return TRUE; } From 15c917ef1522957a260686cf027a9f294ba1c5cd Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 13 May 2025 17:45:10 -0700 Subject: [PATCH 320/352] fix syntax error --- src/prim/windows/prim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 9ffacaa3..b82918c1 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -639,7 +639,7 @@ mi_decl_cache_align size_t _mi_win_tls_offset = 0; static void mi_win_tls_init(DWORD reason) { if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { #if MI_WIN_USE_FIXED_TLS==1 // we must allocate a TLS slot dynamically - if (_mi_win_tls_offset == 0 && reason=DLL_PROCESS_ATTACH) { + if (_mi_win_tls_offset == 0 && reason == DLL_PROCESS_ATTACH) { const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 if (tls_slot == TLS_OUT_OF_INDEXES) { _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); From 08c33768a5344e43a1ba95b88c3adcbb6a5c3498 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 21 May 2025 11:09:34 -0700 Subject: [PATCH 321/352] fix stats for pages and page_bins --- include/mimalloc/internal.h | 2 ++ src/heap.c | 4 +--- src/page-queue.c | 4 ++-- src/page.c | 5 ++--- src/segment.c | 1 + src/stats.c | 6 +++++- test/test-stress.c | 1 + 7 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b11bd357..e277f0ff 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -217,6 +217,7 @@ void _mi_deferred_free(mi_heap_t* heap, bool force); void _mi_page_free_collect(mi_page_t* page,bool force); void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments +size_t _mi_page_bin(const mi_page_t* page); // for stats size_t _mi_bin_size(size_t bin); // for stats size_t _mi_bin(size_t size); // for stats @@ -233,6 +234,7 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* pa // "stats.c" void _mi_stats_done(mi_stats_t* stats); +void _mi_stats_merge_thread(mi_tld_t* tld); mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); diff --git a/src/heap.c b/src/heap.c index 0ea9a2ff..118438b9 100644 --- a/src/heap.c +++ b/src/heap.c @@ -169,9 +169,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_arenas_collect(collect == MI_FORCE /* force purge? */); // merge statistics - if (collect <= MI_FORCE) { - mi_stats_merge(); - } + if (collect <= MI_FORCE) { _mi_stats_merge_thread(heap->tld); } } void _mi_heap_collect_abandon(mi_heap_t* heap) { diff --git a/src/page-queue.c b/src/page-queue.c index 3507505d..38b9aff4 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -136,7 +136,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* } #endif -static size_t mi_page_bin(const mi_page_t* page) { +size_t _mi_page_bin(const mi_page_t* page) { const size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); mi_assert_internal(bin <= MI_BIN_FULL); return bin; @@ -144,7 +144,7 @@ static size_t mi_page_bin(const mi_page_t* page) { static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { mi_assert_internal(heap!=NULL); - const size_t bin = mi_page_bin(page); + const size_t bin = _mi_page_bin(page); mi_page_queue_t* pq = &heap->pages[bin]; mi_assert_internal((mi_page_block_size(page) == pq->block_size) || (mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) || diff --git a/src/page.c b/src/page.c index 55150f33..89acb409 100644 --- a/src/page.c +++ b/src/page.c @@ -290,7 +290,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_assert_internal(full_block_size >= block_size); mi_page_init(heap, page, full_block_size, heap->tld); mi_heap_stat_increase(heap, pages, 1); - mi_heap_stat_increase(heap, page_bins[mi_page_bin(page)], 1); + mi_heap_stat_increase(heap, page_bins[_mi_page_bin(page)], 1); if (pq != NULL) { mi_page_queue_push(heap, pq, page); } mi_assert_expensive(_mi_page_is_valid(page)); return page; @@ -443,8 +443,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_segments_tld_t* segments_tld = &heap->tld->segments; mi_page_queue_remove(pq, page); - // and free it - mi_heap_stat_decrease(heap, page_bins[mi_page_bin(page)], 1); + // and free it mi_page_set_heap(page,NULL); _mi_segment_page_free(page, force, segments_tld); } diff --git a/src/segment.c b/src/segment.c index 75f8dacb..708ddd00 100644 --- a/src/segment.c +++ b/src/segment.c @@ -718,6 +718,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg size_t inuse = page->capacity * mi_page_block_size(page); _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); + _mi_stat_decrease(&tld->stats->page_bins[_mi_page_bin(page)], 1); page->is_zero_init = false; page->segment_in_use = false; diff --git a/src/stats.c b/src/stats.c index 92bc049c..ec8b65a3 100644 --- a/src/stats.c +++ b/src/stats.c @@ -395,6 +395,10 @@ void mi_stats_merge(void) mi_attr_noexcept { mi_stats_merge_from( mi_stats_get_default() ); } +void _mi_stats_merge_thread(mi_tld_t* tld) { + mi_stats_merge_from( &tld->stats ); +} + void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` mi_stats_merge_from(stats); } @@ -498,7 +502,7 @@ static bool mi_heap_buf_expand(mi_heap_buf_t* hbuf) { hbuf->buf[hbuf->size-1] = 0; } if (hbuf->size > SIZE_MAX/2 || !hbuf->can_realloc) return false; - const size_t newsize = (hbuf->size == 0 ? 2*MI_KiB : 2*hbuf->size); + const size_t newsize = (hbuf->size == 0 ? mi_good_size(12*MI_KiB) : 2*hbuf->size); char* const newbuf = (char*)mi_rezalloc(hbuf->buf, newsize); if (newbuf == NULL) return false; hbuf->buf = newbuf; diff --git a/test/test-stress.c b/test/test-stress.c index 9e041064..1abe56d2 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -337,6 +337,7 @@ int main(int argc, char** argv) { mi_free(json); } #endif + mi_collect(true); mi_stats_print(NULL); #endif //bench_end_program(); From 6cb4861f3eb4757ad4d1f1b0ef6aca793244381e Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 21 May 2025 17:36:31 -0700 Subject: [PATCH 322/352] fix format specifier for numa nodes --- src/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats.c b/src/stats.c index ec8b65a3..89d0e103 100644 --- a/src/stats.c +++ b/src/stats.c @@ -348,7 +348,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->malloc_guarded_count, "guarded", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->page_searches, "searches", out, arg); - _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); + _mi_fprintf(out, arg, "%10s: %5i\n", "numa nodes", _mi_os_numa_node_count()); size_t elapsed; size_t user_time; From 44e370bdaaaf2ba9d062113ba3902624a29eaa25 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 21 May 2025 19:20:31 -0700 Subject: [PATCH 323/352] fix format specifier in stat output --- src/stats.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/stats.c b/src/stats.c index 89d0e103..03eafb66 100644 --- a/src/stats.c +++ b/src/stats.c @@ -359,9 +359,9 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) size_t peak_commit; size_t page_faults; mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); - _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); - _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", - user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); + _mi_fprintf(out, arg, "%10s: %5zu.%03zu s\n", "elapsed", elapsed/1000, elapsed%1000); + _mi_fprintf(out, arg, "%10s: user: %zu.%03zu s, system: %zu.%03zu s, faults: %zu, rss: ", "process", + user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { _mi_fprintf(out, arg, ", commit: "); From 3e32b4c38563b6d037d8289cc0a73ea694f88ff1 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 28 May 2025 08:37:34 -0700 Subject: [PATCH 324/352] fix OS allocation size tracking in the memid --- include/mimalloc/internal.h | 4 +++- src/os.c | 11 +++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e277f0ff..2e770943 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -784,8 +784,10 @@ static inline mi_memid_t _mi_memid_none(void) { return _mi_memid_create(MI_MEM_NONE); } -static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { +static inline mi_memid_t _mi_memid_create_os(void* base, size_t size, bool committed, bool is_zero, bool is_large) { mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.mem.os.base = base; + memid.mem.os.size = size; memid.initially_committed = committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; diff --git a/src/os.c b/src/os.c index be7e532c..d134feba 100644 --- a/src/os.c +++ b/src/os.c @@ -339,7 +339,7 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) { bool os_is_zero = false; void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero); if (p != NULL) { - *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); + *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); } return p; } @@ -357,10 +357,9 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo void* os_base = NULL; void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base ); if (p != NULL) { - *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); + *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); memid->mem.os.base = os_base; - // memid->mem.os.alignment = alignment; - memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned + memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned? } return p; } @@ -618,7 +617,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; - uint8_t* start = mi_os_claim_huge_pages(pages, &size); + uint8_t* const start = mi_os_claim_huge_pages(pages, &size); if (start == NULL) return NULL; // or 32-bit systems // Allocate one page at the time but try to place them contiguously @@ -674,7 +673,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } if (page != 0) { mi_assert(start != NULL); - *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); + *memid = _mi_memid_create_os(start, size, true /* is committed */, all_zero, true /* is_large */); memid->memkind = MI_MEM_OS_HUGE; mi_assert(memid->is_pinned); #ifdef MI_TRACK_ASAN From b2637835826108d6aeeea295119971e5d542b0d7 Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 29 May 2025 12:12:02 -0700 Subject: [PATCH 325/352] update readme --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 8b2ada1f..fd97c28c 100644 --- a/readme.md +++ b/readme.md @@ -85,7 +85,7 @@ Enjoy! ### Releases * 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: - fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, + fix arm32 pre-v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. From 715acc03294d36c08c8f9f0dc0cbb2a87c320f2b Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 30 May 2025 09:29:35 -0700 Subject: [PATCH 326/352] mark assert_fail as cold and noreturn; move assert to internal.h (see issue #1091, and python/cpython#134586) --- include/mimalloc/internal.h | 94 +++++++++++++++++++++++++++---------- include/mimalloc/types.h | 21 --------- src/options.c | 2 +- 3 files changed, 69 insertions(+), 48 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2e770943..ddd0ba4f 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -8,7 +8,6 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H - // -------------------------------------------------------------------------- // This file contains the internal API's of mimalloc and various utility // functions and macros. @@ -17,6 +16,11 @@ terms of the MIT license. A copy of the license can be found in the file #include "types.h" #include "track.h" + +// -------------------------------------------------------------------------- +// Compiler defines +// -------------------------------------------------------------------------- + #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else @@ -30,37 +34,69 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#define mi_decl_noreturn __declspec(noreturn) #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#define mi_decl_noreturn __attribute__((noreturn)) #define mi_decl_weak __attribute__((weak)) #define mi_decl_hidden __attribute__((visibility("hidden"))) +#if (__GNUC__ >= 4) || defined(__clang__) +#define mi_decl_cold __attribute__((cold)) +#else +#define mi_decl_cold +#endif #elif __cplusplus >= 201103L // c++11 #define mi_decl_noinline #define mi_decl_thread thread_local #define mi_decl_cache_align alignas(MI_CACHE_LINE) +#define mi_decl_noreturn [[noreturn]] #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_cache_align +#define mi_decl_noreturn #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if defined(__cplusplus) +#define mi_decl_externc extern "C" +#else +#define mi_decl_externc #endif #if defined(__EMSCRIPTEN__) && !defined(__wasi__) #define __wasi__ #endif -#if defined(__cplusplus) -#define mi_decl_externc extern "C" -#else -#define mi_decl_externc -#endif + +// -------------------------------------------------------------------------- +// Internal functions +// -------------------------------------------------------------------------- // "libc.c" #include @@ -256,26 +292,6 @@ bool _mi_page_is_valid(mi_page_t* page); #endif -// ------------------------------------------------------ -// Branches -// ------------------------------------------------------ - -#if defined(__GNUC__) || defined(__clang__) -#define mi_unlikely(x) (__builtin_expect(!!(x),false)) -#define mi_likely(x) (__builtin_expect(!!(x),true)) -#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) -#define mi_unlikely(x) (x) [[unlikely]] -#define mi_likely(x) (x) [[likely]] -#else -#define mi_unlikely(x) (x) -#define mi_likely(x) (x) -#endif - -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - - /* ----------------------------------------------------------- Error codes passed to `_mi_fatal_error` All are recoverable but EFAULT is a serious error and aborts by default in secure mode. @@ -300,6 +316,32 @@ bool _mi_page_is_valid(mi_page_t* page); #endif +// ------------------------------------------------------ +// Assertions +// ------------------------------------------------------ + +#if (MI_DEBUG) +// use our own assertion to print without memory allocation +mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func) mi_attr_noexcept; +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) +#else +#define mi_assert(x) +#endif + +#if (MI_DEBUG>1) +#define mi_assert_internal mi_assert +#else +#define mi_assert_internal(x) +#endif + +#if (MI_DEBUG>2) +#define mi_assert_expensive mi_assert +#else +#define mi_assert_expensive(x) +#endif + + + /* ----------------------------------------------------------- Inlined definitions ----------------------------------------------------------- */ diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index e2b5d318..855374e5 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -574,7 +574,6 @@ struct mi_tld_s { }; - // ------------------------------------------------------ // Debug // ------------------------------------------------------ @@ -589,26 +588,6 @@ struct mi_tld_s { #define MI_DEBUG_PADDING (0xDE) #endif -#if (MI_DEBUG) -// use our own assertion to print without memory allocation -void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); -#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) -#else -#define mi_assert(x) -#endif - -#if (MI_DEBUG>1) -#define mi_assert_internal mi_assert -#else -#define mi_assert_internal(x) -#endif - -#if (MI_DEBUG>2) -#define mi_assert_expensive mi_assert -#else -#define mi_assert_expensive(x) -#endif - // ------------------------------------------------------ // Statistics diff --git a/src/options.c b/src/options.c index 772dfe66..9bb5d1b3 100644 --- a/src/options.c +++ b/src/options.c @@ -525,7 +525,7 @@ void _mi_warning_message(const char* fmt, ...) { #if MI_DEBUG -void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { +mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) mi_attr_noexcept { _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } From e19c0222380a8d53b7d408657dfba0c03d99133c Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 30 May 2025 09:36:38 -0700 Subject: [PATCH 327/352] define mi_decl_align separate from mi_decl_cache_align --- include/mimalloc/internal.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ddd0ba4f..e951b576 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -27,13 +27,14 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_trace_message(...) #endif -#define MI_CACHE_LINE 64 +#define mi_decl_cache_align mi_decl_align(64) + #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #pragma warning(disable:26812) // unscoped enum warning #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) -#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#define mi_decl_align(a) __declspec(align(a)) #define mi_decl_noreturn __declspec(noreturn) #define mi_decl_weak #define mi_decl_hidden @@ -41,7 +42,7 @@ terms of the MIT license. A copy of the license can be found in the file #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread -#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#define mi_decl_align(a) __attribute__((aligned(a))) #define mi_decl_noreturn __attribute__((noreturn)) #define mi_decl_weak __attribute__((weak)) #define mi_decl_hidden __attribute__((visibility("hidden"))) @@ -53,7 +54,7 @@ terms of the MIT license. A copy of the license can be found in the file #elif __cplusplus >= 201103L // c++11 #define mi_decl_noinline #define mi_decl_thread thread_local -#define mi_decl_cache_align alignas(MI_CACHE_LINE) +#define mi_decl_align(a) alignas(a) #define mi_decl_noreturn [[noreturn]] #define mi_decl_weak #define mi_decl_hidden @@ -61,7 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) -#define mi_decl_cache_align +#define mi_decl_align(a) #define mi_decl_noreturn #define mi_decl_weak #define mi_decl_hidden From 2d16d2ca84a3dd4cca8e32fdfa3819bfa2a684a4 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 30 May 2025 10:08:28 -0700 Subject: [PATCH 328/352] update azure pipeline trigger for the main branch (instead of master) --- azure-pipelines.yml | 6 ++---- readme.md | 7 +++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index df608b17..d761d8c8 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,10 +6,8 @@ trigger: branches: include: - - master - - dev3 - - dev2 - - dev + - main + - dev* tags: include: - v* diff --git a/readme.md b/readme.md index fd97c28c..76e2711e 100644 --- a/readme.md +++ b/readme.md @@ -72,15 +72,14 @@ Enjoy! ### Branches -* `master`: latest stable release (still based on `dev2`). +* `main`: latest stable release (still based on `dev2`). * `dev`: development branch for mimalloc v1. **Use this branch for submitting PR's**. * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage mimalloc pages that can reduce fragmentation. -* `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version +* `dev3`: development branch for mimalloc v3-beta. This branch is also downstream of `dev`. This version simplifies the lock-free ownership of previous versions, has no thread-local segments any more. - This improves sharing of memory between threads, and on certain large workloads may use less memory - with less fragmentation. + This improves sharing of memory between threads, and on certain large workloads may use (much) less memory. ### Releases From 1c514847996fba21af276b96d0d748b1ce8f3772 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 11:36:07 -0700 Subject: [PATCH 329/352] fix missing csize assignment in _mi_os_free_ex --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index d134feba..dee263bc 100644 --- a/src/os.c +++ b/src/os.c @@ -181,7 +181,7 @@ static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; - if (csize==0) { _mi_os_good_alloc_size(size); } + if (csize==0) { csize = _mi_os_good_alloc_size(size); } size_t commit_size = (still_committed ? csize : 0); void* base = addr; // different base? (due to alignment) From 60f7e6a6a846dcf9ebe2b29530f0b309d4fd1dd2 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 12:20:02 -0700 Subject: [PATCH 330/352] check all os_commit calls and return NULL on failure --- include/mimalloc/internal.h | 6 +++--- src/os.c | 5 ++++- src/page.c | 26 +++++++++++++++++--------- src/segment.c | 4 +++- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2e770943..e1052787 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -126,13 +126,13 @@ bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero); -bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); bool _mi_os_decommit(void* addr, size_t size); -bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size); +mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero); +mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); +mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); diff --git a/src/os.c b/src/os.c index dee263bc..c720c8ad 100644 --- a/src/os.c +++ b/src/os.c @@ -300,7 +300,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { - _mi_os_commit(p, size, NULL); + if (!_mi_os_commit(p, size, NULL)) { + mi_os_prim_free(p, over_size, 0); + return NULL; + } } } else { // mmap can free inside an allocation diff --git a/src/page.c b/src/page.c index 89acb409..b34ee414 100644 --- a/src/page.c +++ b/src/page.c @@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { @@ -630,14 +630,14 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); mi_assert(page->local_free == NULL); - if (page->free != NULL) return; + if (page->free != NULL) return true; #endif - if (page->capacity >= page->reserved) return; + if (page->capacity >= page->reserved) return true; size_t page_size; //uint8_t* page_start = @@ -673,6 +673,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); + return true; } // Initialize a fresh page @@ -724,8 +725,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,tld); - mi_assert(mi_page_immediate_available(page)); + if (mi_page_extend_free(heap,page,tld)) { + mi_assert(mi_page_immediate_available(page)); + } + return; } @@ -817,9 +820,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page_candidate != NULL) { page = page_candidate; } - if (page != NULL && !mi_page_immediate_available(page)) { - mi_assert_internal(mi_page_is_expandable(page)); - mi_page_extend_free(heap, page, heap->tld); + if (page != NULL) { + if (!mi_page_immediate_available(page)) { + mi_assert_internal(mi_page_is_expandable(page)); + if (!mi_page_extend_free(heap, page, heap->tld)) { + page = NULL; // failed to extend + } + } + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); } if (page == NULL) { diff --git a/src/segment.c b/src/segment.c index 708ddd00..1813a1fc 100644 --- a/src/segment.c +++ b/src/segment.c @@ -182,7 +182,9 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) static void mi_segment_protect_range(void* p, size_t size, bool protect) { if (protect) { - _mi_os_protect(p, size); + if (!_mi_os_protect(p, size)) { + _mi_error_message(EFAULT,"unable to protect segment memory at %p\n", p); + } } else { _mi_os_unprotect(p, size); From a0072ba7c335e38bcd20f854692930fcb1d00d5a Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 15:07:15 -0700 Subject: [PATCH 331/352] fix base address if commit fails on aligned overallocation --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index c720c8ad..580b8af0 100644 --- a/src/os.c +++ b/src/os.c @@ -301,7 +301,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { if (!_mi_os_commit(p, size, NULL)) { - mi_os_prim_free(p, over_size, 0); + mi_os_prim_free(*base, over_size, 0); return NULL; } } From 30a17bf1b773e57fa79c1c96667bf5163a024c02 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 11:36:07 -0700 Subject: [PATCH 332/352] fix missing csize assignment in _mi_os_free_ex --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index d134feba..dee263bc 100644 --- a/src/os.c +++ b/src/os.c @@ -181,7 +181,7 @@ static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; - if (csize==0) { _mi_os_good_alloc_size(size); } + if (csize==0) { csize = _mi_os_good_alloc_size(size); } size_t commit_size = (still_committed ? csize : 0); void* base = addr; // different base? (due to alignment) From 21425bc334ff67d0daafbc1d98056a45f9fab594 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 12:20:02 -0700 Subject: [PATCH 333/352] check all os_commit calls and return NULL on failure --- include/mimalloc/internal.h | 6 +++--- src/os.c | 5 ++++- src/page.c | 26 +++++++++++++++++--------- src/segment.c | 4 +++- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e951b576..5ee59252 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -163,13 +163,13 @@ bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero); -bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); bool _mi_os_decommit(void* addr, size_t size); -bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size); +mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero); +mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); +mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); diff --git a/src/os.c b/src/os.c index dee263bc..c720c8ad 100644 --- a/src/os.c +++ b/src/os.c @@ -300,7 +300,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { - _mi_os_commit(p, size, NULL); + if (!_mi_os_commit(p, size, NULL)) { + mi_os_prim_free(p, over_size, 0); + return NULL; + } } } else { // mmap can free inside an allocation diff --git a/src/page.c b/src/page.c index 89acb409..b34ee414 100644 --- a/src/page.c +++ b/src/page.c @@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { @@ -630,14 +630,14 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); mi_assert(page->local_free == NULL); - if (page->free != NULL) return; + if (page->free != NULL) return true; #endif - if (page->capacity >= page->reserved) return; + if (page->capacity >= page->reserved) return true; size_t page_size; //uint8_t* page_start = @@ -673,6 +673,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); + return true; } // Initialize a fresh page @@ -724,8 +725,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,tld); - mi_assert(mi_page_immediate_available(page)); + if (mi_page_extend_free(heap,page,tld)) { + mi_assert(mi_page_immediate_available(page)); + } + return; } @@ -817,9 +820,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page_candidate != NULL) { page = page_candidate; } - if (page != NULL && !mi_page_immediate_available(page)) { - mi_assert_internal(mi_page_is_expandable(page)); - mi_page_extend_free(heap, page, heap->tld); + if (page != NULL) { + if (!mi_page_immediate_available(page)) { + mi_assert_internal(mi_page_is_expandable(page)); + if (!mi_page_extend_free(heap, page, heap->tld)) { + page = NULL; // failed to extend + } + } + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); } if (page == NULL) { diff --git a/src/segment.c b/src/segment.c index 708ddd00..1813a1fc 100644 --- a/src/segment.c +++ b/src/segment.c @@ -182,7 +182,9 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) static void mi_segment_protect_range(void* p, size_t size, bool protect) { if (protect) { - _mi_os_protect(p, size); + if (!_mi_os_protect(p, size)) { + _mi_error_message(EFAULT,"unable to protect segment memory at %p\n", p); + } } else { _mi_os_unprotect(p, size); From 6c3d75a355c14bd3e67c67aed76f0297ab24ed6f Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 15:07:15 -0700 Subject: [PATCH 334/352] fix base address if commit fails on aligned overallocation --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index c720c8ad..580b8af0 100644 --- a/src/os.c +++ b/src/os.c @@ -301,7 +301,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { if (!_mi_os_commit(p, size, NULL)) { - mi_os_prim_free(p, over_size, 0); + mi_os_prim_free(*base, over_size, 0); return NULL; } } From 2d34956bedded440b4aff9d8a53570f5c6a8e2be Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 15:53:28 -0700 Subject: [PATCH 335/352] use main instead of master in readme --- azure-pipelines.yml | 35 ++--------------------------------- readme.md | 5 ++--- 2 files changed, 4 insertions(+), 36 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b7fc59d4..ea915815 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,10 +6,8 @@ trigger: branches: include: - - master - - dev3 - - dev2 - - dev + - main + - dev* tags: include: - v* @@ -184,35 +182,6 @@ jobs: # Other OS versions (just debug mode) # ---------------------------------------------------------- -- job: - displayName: Windows 2019 - pool: - vmImage: - windows-2019 - strategy: - matrix: - Debug: - BuildType: debug - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON - MSBuildConfiguration: Debug - Release: - BuildType: release - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release - MSBuildConfiguration: Release - steps: - - task: CMake@1 - inputs: - workingDirectory: $(BuildType) - cmakeArgs: .. $(cmakeExtraArgs) - - task: MSBuild@1 - inputs: - solution: $(BuildType)/libmimalloc.sln - configuration: '$(MSBuildConfiguration)' - msbuildArguments: -m - - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration) - workingDirectory: $(BuildType) - displayName: CTest - - job: displayName: Ubuntu 24.04 pool: diff --git a/readme.md b/readme.md index cee78898..601a7e24 100644 --- a/readme.md +++ b/readme.md @@ -72,15 +72,14 @@ Enjoy! ### Branches -* `master`: latest stable release (still based on `dev2`). +* `main`: latest stable release (still based on `dev2`). * `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage mimalloc pages that can reduce fragmentation. * `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version simplifies the lock-free ownership of previous versions, has no thread-local segments any more. - This improves sharing of memory between threads, and on certain large workloads may use less memory - with less fragmentation. + This improves sharing of memory between threads, and on certain large workloads may use (much) less memory. ### Releases From d389819cc9243c2647684544c2942ef6cc893a2a Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 17:59:46 -0700 Subject: [PATCH 336/352] add initial support for _mi_prim_reuse and MADV_FREE_REUSABLE on macOS (issue #1097) --- include/mimalloc/internal.h | 1 + include/mimalloc/prim.h | 5 +++++ src/arena.c | 6 +++++- src/os.c | 11 +++++++++++ src/prim/emscripten/prim.c | 5 +++++ src/prim/unix/prim.c | 33 +++++++++++++++++++++++++++------ src/prim/wasi/prim.c | 5 +++++ src/prim/windows/prim.c | 5 +++++ 8 files changed, 64 insertions(+), 7 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 5ee59252..3e57e252 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -167,6 +167,7 @@ bool _mi_os_decommit(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size); +void _mi_os_reuse(void* p, size_t size); mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero); mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 3d8f1806..c71678cc 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -63,6 +63,11 @@ int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); // Returns error code or 0 on success. int _mi_prim_reset(void* addr, size_t size); +// Reuse memory. This is called for memory that is already committed but +// may have been reset (`_mi_prim_reset`) or decommitted (`_mi_prim_decommit`) where `needs_recommit` was false. +// Returns error code or 0 on success. On most platforms this is a no-op. +int _mi_prim_reuse(void* addr, size_t size); + // Protect memory. Returns error code or 0 on success. int _mi_prim_protect(void* addr, size_t size, bool protect); diff --git a/src/arena.c b/src/arena.c index aa01ffcb..25cef886 100644 --- a/src/arena.c +++ b/src/arena.c @@ -266,12 +266,12 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar else if (commit) { // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; + const size_t commit_size = mi_arena_block_size(needed_bcount); bool any_uncommitted; size_t already_committed = 0; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed); if (any_uncommitted) { mi_assert_internal(already_committed < needed_bcount); - const size_t commit_size = mi_arena_block_size(needed_bcount); const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed); bool commit_zero = false; if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) { @@ -281,6 +281,10 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar if (commit_zero) { memid->initially_zero = true; } } } + else { + // all are already committed: signal that we are reusing memory in case it was purged before + _mi_os_reuse( p, commit_size ); + } } else { // no need to commit, but check if already fully committed diff --git a/src/os.c b/src/os.c index 580b8af0..4c99d625 100644 --- a/src/os.c +++ b/src/os.c @@ -512,6 +512,17 @@ bool _mi_os_reset(void* addr, size_t size) { } +void _mi_os_reuse( void* addr, size_t size ) { + // page align conservatively within the range + size_t csize = 0; + void* const start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return; + const int err = _mi_prim_reuse(start, csize); + if (err != 0) { + _mi_warning_message("cannot reuse OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + } +} + // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size) diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index a8677cbc..c4cfc35d 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -114,6 +114,11 @@ int _mi_prim_reset(void* addr, size_t size) { return 0; } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index a90fa659..9ac855a5 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -433,13 +433,27 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) { return err; } +int _mi_prim_reuse(void* start, size_t size) { + #if defined(__APPLE__) && defined(MADV_FREE_REUSE) + return unix_madvise(start, size, MADV_FREE_REUSE); + #endif + return 0; +} + int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - err = unix_madvise(start, size, MADV_DONTNEED); #if !MI_DEBUG && MI_SECURE<=2 *needs_recommit = false; + #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) + // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097) + err = unix_madvise(start, size, MADV_FREE_REUSABLE); + #else + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + err = unix_madvise(start, size, MADV_DONTNEED); + #endif #else + // note: don't use MADV_FREE_REUSABLE as the range may contain protected areas + err = unix_madvise(start, size, MADV_DONTNEED); *needs_recommit = true; mprotect(start, size, PROT_NONE); #endif @@ -454,14 +468,21 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { } int _mi_prim_reset(void* start, size_t size) { - // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it + int err = 0; + #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) + // on macOS we try to use MADV_FREE_REUSABLE as it seems the fastest + err = unix_madvise(start, size, MADV_FREE_REUSABLE); + if (err == 0) return 0; + // fall through + #endif + + #if defined(MADV_FREE) + // Otherwise, we try to use `MADV_FREE` as that is the fastest. A drawback though is that it // will not reduce the `rss` stats in tools like `top` even though the memory is available // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by // default `MADV_DONTNEED` is used though. - #if defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); - int err; while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on @@ -469,7 +490,7 @@ int _mi_prim_reset(void* start, size_t size) { err = unix_madvise(start, size, MADV_DONTNEED); } #else - int err = unix_madvise(start, size, MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); #endif return err; } diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index e1e7de5e..745a41fd 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -149,6 +149,11 @@ int _mi_prim_reset(void* addr, size_t size) { return 0; } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index b82918c1..df941af9 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -352,6 +352,11 @@ int _mi_prim_reset(void* addr, size_t size) { return (p != NULL ? 0 : (int)GetLastError()); } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { DWORD oldprotect = 0; BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); From 2696627aafef1afc52ead72fe27c3a2a1347b27a Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 20:07:37 -0700 Subject: [PATCH 337/352] add MI_UNUSED for unix _mi_prim_reuse --- src/prim/unix/prim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 9ac855a5..8452b8c2 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -434,6 +434,7 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) { } int _mi_prim_reuse(void* start, size_t size) { + MI_UNUSED(start); MI_UNUSED(size); #if defined(__APPLE__) && defined(MADV_FREE_REUSE) return unix_madvise(start, size, MADV_FREE_REUSE); #endif From d7431402c5ef192a5d9c277abdc2fb4640abc4c1 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 20:15:16 -0700 Subject: [PATCH 338/352] fall back to MADV_DONTNEED if MADV_FREE_REUSABLE fails on macOS; disable use of MADV_FREE_REUSE on a reset (issue #1097) --- src/prim/unix/prim.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 8452b8c2..780d254f 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -443,18 +443,17 @@ int _mi_prim_reuse(void* start, size_t size) { int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; - #if !MI_DEBUG && MI_SECURE<=2 - *needs_recommit = false; - #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) + #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097) err = unix_madvise(start, size, MADV_FREE_REUSABLE); - #else + if (err) { err = unix_madvise(start, size, MADV_DONTNEED); } + #else // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) err = unix_madvise(start, size, MADV_DONTNEED); - #endif + #endif + #if !MI_DEBUG && MI_SECURE<=2 + *needs_recommit = false; #else - // note: don't use MADV_FREE_REUSABLE as the range may contain protected areas - err = unix_madvise(start, size, MADV_DONTNEED); *needs_recommit = true; mprotect(start, size, PROT_NONE); #endif @@ -470,10 +469,11 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int _mi_prim_reset(void* start, size_t size) { int err = 0; - #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) - // on macOS we try to use MADV_FREE_REUSABLE as it seems the fastest + + // on macOS can use MADV_FREE_REUSABLE (but we disable this for now as it seems slower) + #if 0 && defined(__APPLE__) && defined(MADV_FREE_REUSABLE) err = unix_madvise(start, size, MADV_FREE_REUSABLE); - if (err == 0) return 0; + if (err==0) return 0; // fall through #endif From 2f0540c4f9e57c55cca4e0d621dd8b3c74843ef0 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 20:50:50 -0700 Subject: [PATCH 339/352] add _mi_os_zalloc --- include/mimalloc/internal.h | 1 + include/mimalloc/prim.h | 2 +- src/arena.c | 7 +----- src/init.c | 28 +++++++++-------------- src/os.c | 45 ++++++++++++++++++++++++++++++------- src/segment-map.c | 2 +- 6 files changed, 51 insertions(+), 34 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 3e57e252..7250d31a 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -154,6 +154,7 @@ void _mi_heap_guarded_init(mi_heap_t* heap); // os.c void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_memid_t* memid); +void* _mi_os_zalloc(size_t size, mi_memid_t* memid); void _mi_os_free(void* p, size_t size, mi_memid_t memid); void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index c71678cc..1087d9b8 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -59,7 +59,7 @@ int _mi_prim_commit(void* addr, size_t size, bool* is_zero); // pre: needs_recommit != NULL int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); -// Reset memory. The range keeps being accessible but the content might be reset. +// Reset memory. The range keeps being accessible but the content might be reset to zero at any moment. // Returns error code or 0 on success. int _mi_prim_reset(void* addr, size_t size); diff --git a/src/arena.c b/src/arena.c index 25cef886..ba36c415 100644 --- a/src/arena.c +++ b/src/arena.c @@ -188,14 +188,9 @@ void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid) { if (p != NULL) return p; // or fall back to the OS - p = _mi_os_alloc(size, memid); + p = _mi_os_zalloc(size, memid); if (p == NULL) return NULL; - // zero the OS memory if needed - if (!memid->initially_zero) { - _mi_memzero_aligned(p, size); - memid->initially_zero = true; - } return p; } diff --git a/src/init.c b/src/init.c index ff6c5d29..05ff688e 100644 --- a/src/init.c +++ b/src/init.c @@ -298,7 +298,6 @@ static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; static mi_thread_data_t* mi_thread_data_zalloc(void) { // try to find thread metadata in the cache - bool is_zero = false; mi_thread_data_t* td = NULL; for (int i = 0; i < TD_CACHE_SIZE; i++) { td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); @@ -306,32 +305,25 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { // found cached allocation, try use it td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - break; + _mi_memzero(td, offsetof(mi_thread_data_t,memid)); + return td; } } } // if that fails, allocate as meta data + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid); if (td == NULL) { - mi_memid_t memid; - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid); + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid); if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid); - if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); - } - } - if (td != NULL) { - td->memid = memid; - is_zero = memid.initially_zero; + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + return NULL; } } - - if (td != NULL && !is_zero) { - _mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid)); - } + td->memid = memid; return td; } diff --git a/src/os.c b/src/os.c index 4c99d625..3c25ff59 100644 --- a/src/os.c +++ b/src/os.c @@ -182,6 +182,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; if (csize==0) { csize = _mi_os_good_alloc_size(size); } + mi_assert_internal(csize >= size); size_t commit_size = (still_committed ? csize : 0); void* base = addr; // different base? (due to alignment) @@ -341,9 +342,11 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) { bool os_is_large = false; bool os_is_zero = false; void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero); - if (p != NULL) { - *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); - } + if (p == NULL) return NULL; + + *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); + mi_assert_internal(memid->mem.os.size >= size); + mi_assert_internal(memid->initially_committed); return p; } @@ -359,14 +362,40 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo bool os_is_zero = false; void* os_base = NULL; void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base ); - if (p != NULL) { - *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); - memid->mem.os.base = os_base; - memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned? - } + if (p == NULL) return NULL; + + *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); + memid->mem.os.base = os_base; + memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned? + + mi_assert_internal(memid->mem.os.size >= size); + mi_assert_internal(_mi_is_aligned(p,alignment)); + mi_assert_internal(!commit || memid->initially_committed); + mi_assert_internal(!memid->initially_zero || memid->initially_committed); return p; } + +mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) { + if (p==NULL || size==0 || memid->initially_zero) return p; + if (!memid->initially_committed) { + bool is_zero = false; + if (!_mi_os_commit(p, size, &is_zero)) { + _mi_os_free(p, size, *memid); + return NULL; + } + memid->initially_committed = true; + } + _mi_memzero_aligned(p,size); + memid->initially_zero = true; + return p; +} + +void* _mi_os_zalloc(size_t size, mi_memid_t* memid) { + void* p = _mi_os_alloc(size,memid); + return mi_os_ensure_zero(p, size, memid); +} + /* ----------------------------------------------------------- OS aligned allocation with an offset. This is used for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc diff --git a/src/segment-map.c b/src/segment-map.c index 2f68f8c4..bbcea28a 100644 --- a/src/segment-map.c +++ b/src/segment-map.c @@ -61,7 +61,7 @@ static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bo if mi_unlikely(part == NULL) { if (!create_on_demand) return NULL; mi_memid_t memid; - part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid); + part = (mi_segmap_part_t*)_mi_os_zalloc(sizeof(mi_segmap_part_t), &memid); if (part == NULL) return NULL; part->memid = memid; mi_segmap_part_t* expected = NULL; From 57830a4b254673de60900ab83031b5b8454d947a Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 21:09:32 -0700 Subject: [PATCH 340/352] fix assertion in mi_os_ensure_zero --- src/os.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index 3c25ff59..028a5ee5 100644 --- a/src/os.c +++ b/src/os.c @@ -370,14 +370,15 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo mi_assert_internal(memid->mem.os.size >= size); mi_assert_internal(_mi_is_aligned(p,alignment)); - mi_assert_internal(!commit || memid->initially_committed); - mi_assert_internal(!memid->initially_zero || memid->initially_committed); + if (commit) { mi_assert_internal(memid->initially_committed); } + if (memid->initially_zero) { mi_assert_internal(memid->initially_committed); } return p; } mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) { - if (p==NULL || size==0 || memid->initially_zero) return p; + if (p==NULL || size==0) return p; + // ensure committed if (!memid->initially_committed) { bool is_zero = false; if (!_mi_os_commit(p, size, &is_zero)) { @@ -386,6 +387,8 @@ mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_ } memid->initially_committed = true; } + // ensure zero'd + if (memid->initially_zero) return p; _mi_memzero_aligned(p,size); memid->initially_zero = true; return p; From d7d6c3b5c3ffe106077625bc21741b522c603f03 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 21:22:44 -0700 Subject: [PATCH 341/352] fix assertion --- src/os.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/os.c b/src/os.c index 028a5ee5..279b60fb 100644 --- a/src/os.c +++ b/src/os.c @@ -370,8 +370,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo mi_assert_internal(memid->mem.os.size >= size); mi_assert_internal(_mi_is_aligned(p,alignment)); - if (commit) { mi_assert_internal(memid->initially_committed); } - if (memid->initially_zero) { mi_assert_internal(memid->initially_committed); } + if (commit) { mi_assert_internal(memid->initially_committed); } return p; } From e7cbbbfb1417c8c4cc0855f081d1c29a64023ab5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 7 Jun 2025 09:51:35 -0700 Subject: [PATCH 342/352] add mi_process_done to the api --- include/mimalloc.h | 14 +++++++++----- src/init.c | 6 +++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 97cf7856..ce814d18 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -153,17 +153,21 @@ mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; -mi_decl_export void mi_options_print(void) mi_attr_noexcept; - -mi_decl_export void mi_process_init(void) mi_attr_noexcept; -mi_decl_export void mi_thread_init(void) mi_attr_noexcept; -mi_decl_export void mi_thread_done(void) mi_attr_noexcept; mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; +mi_decl_export void mi_options_print(void) mi_attr_noexcept; mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; + +// Generally do not use the following as these are usually called automatically +mi_decl_export void mi_process_init(void) mi_attr_noexcept; +mi_decl_export void mi_cdecl mi_process_done(void) mi_attr_noexcept; +mi_decl_export void mi_thread_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_done(void) mi_attr_noexcept; + + // ------------------------------------------------------------------------------------- // Aligned allocation // Note that `alignment` always follows `size` for consistency with unaligned diff --git a/src/init.c b/src/init.c index 05ff688e..eb87ab3c 100644 --- a/src/init.c +++ b/src/init.c @@ -642,7 +642,11 @@ void mi_process_init(void) mi_attr_noexcept { } } -// Called when the process is done (through `at_exit`) +void mi_cdecl mi_process_done(void) mi_attr_noexcept { + _mi_process_done(); +} + +// Called when the process is done (cdecl as it is used with `at_exit` on some platforms) void mi_cdecl _mi_process_done(void) { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; From c1249a4b1502a35d68be51aceddd466a301f5a25 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 7 Jun 2025 10:12:53 -0700 Subject: [PATCH 343/352] do not automatically call mi_process_done if mi_option_destroy_on_exit > 1 --- include/mimalloc/internal.h | 6 +++--- src/init.c | 14 +++++++------- src/prim/prim.c | 12 ++++++------ src/prim/windows/prim.c | 18 +++++++++--------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 7250d31a..a29a419a 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -63,7 +63,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_align(a) -#define mi_decl_noreturn +#define mi_decl_noreturn #define mi_decl_weak #define mi_decl_hidden #define mi_decl_cold @@ -135,8 +135,8 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c extern mi_decl_hidden mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_hidden mi_decl_cache_align const mi_page_t _mi_page_empty; -void _mi_process_load(void); -void mi_cdecl _mi_process_done(void); +void _mi_auto_process_init(void); +void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept; bool _mi_is_redirected(void); bool _mi_allocator_init(const char** message); void _mi_allocator_done(void); diff --git a/src/init.c b/src/init.c index eb87ab3c..0a71ce05 100644 --- a/src/init.c +++ b/src/init.c @@ -323,7 +323,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { return NULL; } } - td->memid = memid; + td->memid = memid; return td; } @@ -555,7 +555,7 @@ mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { } // Called once by the process loader from `src/prim/prim.c` -void _mi_process_load(void) { +void _mi_auto_process_init(void) { mi_heap_main_init(); #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; @@ -642,12 +642,8 @@ void mi_process_init(void) mi_attr_noexcept { } } -void mi_cdecl mi_process_done(void) mi_attr_noexcept { - _mi_process_done(); -} - // Called when the process is done (cdecl as it is used with `at_exit` on some platforms) -void mi_cdecl _mi_process_done(void) { +void mi_cdecl mi_process_done(void) mi_attr_noexcept { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; // ensure we are called once @@ -690,3 +686,7 @@ void mi_cdecl _mi_process_done(void) { os_preloading = true; // don't call the C runtime anymore } +void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept { + if (_mi_option_get_fast(mi_option_destroy_on_exit)>1) return; + mi_process_done(); +} diff --git a/src/prim/prim.c b/src/prim/prim.c index 2002853f..5147bae8 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -39,29 +39,29 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_destructor __attribute__((destructor)) #endif static void mi_attr_constructor mi_process_attach(void) { - _mi_process_load(); + _mi_auto_process_init(); } static void mi_attr_destructor mi_process_detach(void) { - _mi_process_done(); + _mi_auto_process_done(); } #elif defined(__cplusplus) // C++: use static initialization to detect process start/end // This is not guaranteed to be first/last but the best we can generally do? struct mi_init_done_t { mi_init_done_t() { - _mi_process_load(); + _mi_auto_process_init(); } ~mi_init_done_t() { - _mi_process_done(); + _mi_auto_process_done(); } }; static mi_init_done_t mi_init_done; #else - #pragma message("define a way to call _mi_process_load/done on your platform") + #pragma message("define a way to call _mi_auto_process_init/done on your platform") #endif #endif -// Generic allocator init/done callback +// Generic allocator init/done callback #ifndef MI_PRIM_HAS_ALLOCATOR_INIT bool _mi_is_redirected(void) { return false; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index df941af9..6752569c 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -633,7 +633,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { //---------------------------------------------------------------- #if MI_WIN_USE_FIXED_TLS==1 -mi_decl_cache_align size_t _mi_win_tls_offset = 0; +mi_decl_cache_align size_t _mi_win_tls_offset = 0; #endif //static void mi_debug_out(const char* s) { @@ -654,14 +654,14 @@ static void mi_win_tls_init(DWORD reason) { #endif #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if (mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); mi_assert_internal(p == (void*)&_mi_heap_empty); - #endif + #endif } - #endif - } + #endif + } } static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { @@ -669,10 +669,10 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(module); mi_win_tls_init(reason); if (reason==DLL_PROCESS_ATTACH) { - _mi_process_load(); + _mi_auto_process_init(); } else if (reason==DLL_PROCESS_DETACH) { - _mi_process_done(); + _mi_auto_process_done(); } else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) { _mi_thread_done(NULL); @@ -684,7 +684,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #define MI_PRIM_HAS_PROCESS_ATTACH 1 // Windows DLL: easy to hook into process_init and thread_done - BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { + BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { mi_win_main((PVOID)inst,reason,reserved); return TRUE; } @@ -762,7 +762,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { static int mi_process_attach(void) { mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL); - atexit(&_mi_process_done); + atexit(&_mi_auto_process_done); return 0; } typedef int(*mi_crt_callback_t)(void); From 82cd90083494284691326e13362db61d8d21672f Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 8 Jun 2025 15:54:01 -0700 Subject: [PATCH 344/352] make macOS interposes compile for older macOS versions (by @noxybot, PR #1028) --- src/alloc-override.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/alloc-override.c b/src/alloc-override.c index b5109ded..52ab69c5 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -71,24 +71,20 @@ typedef void* mi_nothrow_t; #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) - __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) = + #define MI_INTERPOSE_DECLS(name) __attribute__((used)) static struct mi_interpose_s name[] __attribute__((section("__DATA, __interpose"))) + + MI_INTERPOSE_DECLS(_mi_interposes) = { MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), MI_INTERPOSE_MI(strdup), - #if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7 - MI_INTERPOSE_MI(strndup), - #endif MI_INTERPOSE_MI(realpath), MI_INTERPOSE_MI(posix_memalign), MI_INTERPOSE_MI(reallocf), MI_INTERPOSE_MI(valloc), MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked), MI_INTERPOSE_MI(malloc_good_size), - #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 - MI_INTERPOSE_MI(aligned_alloc), - #endif #ifdef MI_OSX_ZONE // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely MI_INTERPOSE_MI(free), @@ -99,6 +95,12 @@ typedef void* mi_nothrow_t; MI_INTERPOSE_FUN(vfree,mi_cfree), #endif }; + MI_INTERPOSE_DECLS(_mi_interposes_10_7) __OSX_AVAILABLE(10.7) = { + MI_INTERPOSE_MI(strndup), + }; + MI_INTERPOSE_DECLS(_mi_interposes_10_15) __OSX_AVAILABLE(10.15) = { + MI_INTERPOSE_MI(aligned_alloc), + }; #ifdef __cplusplus extern "C" { From a981d40787251ae52c720df4b602df687da93fd2 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 8 Jun 2025 16:02:44 -0700 Subject: [PATCH 345/352] fix link error without static library build (by @fd00, PR #1082) --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a3acf83e..353127d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -713,10 +713,10 @@ if (MI_BUILD_TESTS) target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include) - if(MI_BUILD_SHARED AND (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN)) - target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries}) - else() + if(MI_BUILD_STATIC) target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries}) + else() + target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries}) endif() add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME}) endforeach() From 99ed3ea754c31e383fe88da467eb861aff9f7146 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 8 Jun 2025 16:41:04 -0700 Subject: [PATCH 346/352] enable building for xbox, based on pr #1084 by @maxbachmann --- src/prim/windows/prim.c | 48 ++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 6752569c..eebdc4a6 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -12,6 +12,10 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/prim.h" #include // fputs, stderr +// xbox has no console IO +#if !defined(WINAPI_FAMILY_PARTITION) || WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM) +#define MI_HAS_CONSOLE_IO +#endif //--------------------------------------------- // Dynamically bind Windows API points for portability @@ -45,22 +49,30 @@ typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { #define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 #include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef LONG (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); // avoid NTSTATUS as it is not defined on xbox (pr #1084) static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; -// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7 +// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7 (and GetNumaNodeProcessorMask is not supported on xbox) typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); +typedef BOOL (__stdcall* PGetNumaNodeProcessorMask)(UCHAR Node, PULONGLONG ProcessorMask); +typedef BOOL (__stdcall* PGetNumaHighestNodeNumber)(PULONG Node); static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; +static PGetNumaNodeProcessorMask pGetNumaNodeProcessorMask = NULL; +static PGetNumaHighestNodeNumber pGetNumaHighestNodeNumber = NULL; + +// Not available on xbox +typedef SIZE_T(__stdcall* PGetLargePageMinimum)(VOID); +static PGetLargePageMinimum pGetLargePageMinimum = NULL; // Available after Windows XP typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes ); @@ -74,6 +86,7 @@ static bool win_enable_large_os_pages(size_t* large_page_size) static bool large_initialized = false; if (large_initialized) return (_mi_os_large_page_size() > 0); large_initialized = true; + if (pGetLargePageMinimum==NULL) return false; // no large page support (xbox etc.) // Try to see if large OS pages are supported // To use large pages on Windows, we first need access permission @@ -92,8 +105,8 @@ static bool win_enable_large_os_pages(size_t* large_page_size) if (ok) { err = GetLastError(); ok = (err == ERROR_SUCCESS); - if (ok && large_page_size != NULL) { - *large_page_size = GetLargePageMinimum(); + if (ok && large_page_size != NULL && pGetLargePageMinimum != NULL) { + *large_page_size = (*pGetLargePageMinimum)(); } } } @@ -149,6 +162,9 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); + pGetNumaNodeProcessorMask = (PGetNumaNodeProcessorMask)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMask"); + pGetNumaHighestNodeNumber = (PGetNumaHighestNodeNumber)(void (*)(void))GetProcAddress(hDll, "GetNumaHighestNodeNumber"); + pGetLargePageMinimum = (PGetLargePageMinimum)(void (*)(void))GetProcAddress(hDll, "GetLargePageMinimum"); // Get physical memory (not available on XP, so check dynamically) PGetPhysicallyInstalledSystemMemory pGetPhysicallyInstalledSystemMemory = (PGetPhysicallyInstalledSystemMemory)(void (*)(void))GetProcAddress(hDll,"GetPhysicallyInstalledSystemMemory"); if (pGetPhysicallyInstalledSystemMemory != NULL) { @@ -388,7 +404,7 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num } SIZE_T psize = size; void* base = hint_addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + LONG err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); if (err == 0 && base != NULL) { return base; } @@ -442,9 +458,11 @@ size_t _mi_prim_numa_node(void) { size_t _mi_prim_numa_node_count(void) { ULONG numa_max = 0; - GetNumaHighestNodeNumber(&numa_max); + if (pGetNumaHighestNodeNumber!=NULL) { + (*pGetNumaHighestNodeNumber)(&numa_max); + } // find the highest node number that has actual processors assigned to it. Issue #282 - while(numa_max > 0) { + while (numa_max > 0) { if (pGetNumaNodeProcessorMaskEx != NULL) { // Extended API is supported GROUP_AFFINITY affinity; @@ -455,8 +473,10 @@ size_t _mi_prim_numa_node_count(void) { else { // Vista or earlier, use older API that is limited to 64 processors. ULONGLONG mask; - if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { - if (mask != 0) break; // found the maximum non-empty node + if (pGetNumaNodeProcessorMask != NULL) { + if ((*pGetNumaNodeProcessorMask)((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node + } }; } // max node was invalid or had no processor assigned, try again @@ -546,17 +566,21 @@ void _mi_prim_out_stderr( const char* msg ) if (!_mi_preloading()) { // _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console static HANDLE hcon = INVALID_HANDLE_VALUE; - static bool hconIsConsole; + static bool hconIsConsole = false; if (hcon == INVALID_HANDLE_VALUE) { - CONSOLE_SCREEN_BUFFER_INFO sbi; hcon = GetStdHandle(STD_ERROR_HANDLE); + #ifdef MI_HAS_CONSOLE_IO + CONSOLE_SCREEN_BUFFER_INFO sbi; hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); + #endif } const size_t len = _mi_strlen(msg); if (len > 0 && len < UINT32_MAX) { DWORD written = 0; if (hconIsConsole) { + #ifdef MI_HAS_CONSOLE_IO WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + #endif } else if (hcon != INVALID_HANDLE_VALUE) { // use direct write if stderr was redirected From 3b2daccf9d1d50ba3a58375f6e9ff0733d1a0c9a Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 8 Jun 2025 16:50:29 -0700 Subject: [PATCH 347/352] fix build for TSAN tests --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 353127d5..c58e64f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -713,10 +713,12 @@ if (MI_BUILD_TESTS) target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include) - if(MI_BUILD_STATIC) + if(MI_BUILD_STATIC AND NOT MI_DEBUG_TSAN) target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries}) - else() + elseif(MI_BUILD_SHARED) target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries}) + else() + message(STATUS "cannot build TSAN tests without MI_BUILD_SHARED being enabled") endif() add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME}) endforeach() From 316a434d8e282656bcad243b6d794fc7feb07038 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 9 Jun 2025 19:05:30 -0700 Subject: [PATCH 348/352] fix armv7 detection --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c58e64f8..5ce084f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,7 +126,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENE set(MI_ARCH "x64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) set(MI_ARCH "arm64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567].?|ARM)$") set(MI_ARCH "arm32") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") if(CMAKE_SIZEOF_VOID_P==4) From 82b67862c8763040ee90a724e78bf4738e50eb34 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 9 Jun 2025 19:18:51 -0700 Subject: [PATCH 349/352] update vcpkg config to 1.9.4 --- contrib/vcpkg/portfile.cmake | 5 +++-- contrib/vcpkg/vcpkg.json | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake index 69661526..b59c3675 100644 --- a/contrib/vcpkg/portfile.cmake +++ b/contrib/vcpkg/portfile.cmake @@ -4,8 +4,8 @@ vcpkg_from_github( HEAD_REF master # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1). - # REF "v${VERSION}" - REF 866ce5b89db1dbc3e66bbf89041291fd16329518 + REF "v${VERSION}" + # REF 866ce5b89db1dbc3e66bbf89041291fd16329518 # The sha512 is the hash of the tar.gz bundle. # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=` and copy the sha from the error message.) @@ -19,6 +19,7 @@ vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS secure MI_SECURE override MI_OVERRIDE optarch MI_OPT_ARCH + nooptarch MI_NO_OPT_ARCH optsimd MI_OPT_SIMD xmalloc MI_XMALLOC asm MI_SEE_ASM diff --git a/contrib/vcpkg/vcpkg.json b/contrib/vcpkg/vcpkg.json index 45f8097b..b38555a1 100644 --- a/contrib/vcpkg/vcpkg.json +++ b/contrib/vcpkg/vcpkg.json @@ -1,6 +1,6 @@ { "name": "mimalloc", - "version": "1.9.2", + "version": "1.9.4", "port-version": 2, "description": "Compact general purpose allocator with excellent performance", "homepage": "https://github.com/microsoft/mimalloc", @@ -35,6 +35,9 @@ "optarch": { "description": "Use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')" }, + "nooptarch": { + "description": "Do _not_ use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')" + }, "optsimd": { "description": "Allow use of SIMD instructions (avx2 or neon) (requires 'optarch' to be enabled)" }, From 6d3c8607699da8375ec9985aaacdb1fa8ea6ea4d Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 9 Jun 2025 19:19:18 -0700 Subject: [PATCH 350/352] update readme for upcoming release --- readme.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/readme.md b/readme.md index 601a7e24..71aaf7a2 100644 --- a/readme.md +++ b/readme.md @@ -12,9 +12,9 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release : `v3.0.3` (beta) (2025-03-28). -Latest v2 release: `v2.2.3` (2025-03-28). -Latest v1 release: `v1.9.3` (2024-03-28). +Latest release : `v3.1.4` (beta) (2025-06-09). +Latest v2 release: `v2.2.4` (2025-06-09). +Latest v1 release: `v1.9.4` (2024-06-09). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -77,12 +77,16 @@ Enjoy! * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage mimalloc pages that can reduce fragmentation. -* `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version - simplifies the lock-free ownership of previous versions, has no thread-local segments any more. - This improves sharing of memory between threads, and on certain large workloads may use (much) less memory. +* `dev3`: development branch for mimalloc v3 beta. This branch is downstream of `dev`. This version + simplifies the lock-free ownership of previous versions, and improves sharing of memory between + threads. On certain large workloads this version may use (much) less memory. ### Releases +* 2025-06-09, `v1.9.4`, `v2.2.4`, `v3.1.4` (beta) : Some important bug fixes, including a case where OS memory + was not always fully released. Improved v3 performance, build on XBox, fix build on Android, support interpose + for older macOS versions, use MADV_FREE_REUSABLE on macOS, always check commit success, better support for Windows + fixed TLS offset, etc. * 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including: fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. From cfff6bfd47bd491145364b210d6552ea2c42444d Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 9 Jun 2025 20:01:28 -0700 Subject: [PATCH 351/352] bump version to v1.9.5 for further development --- cmake/mimalloc-config-version.cmake | 2 +- contrib/vcpkg/portfile.cmake | 2 +- include/mimalloc.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 0446485b..1057b5c0 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 1) set(mi_version_minor 9) -set(mi_version_patch 4) +set(mi_version_patch 5) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake index b59c3675..a13b57c3 100644 --- a/contrib/vcpkg/portfile.cmake +++ b/contrib/vcpkg/portfile.cmake @@ -9,7 +9,7 @@ vcpkg_from_github( # The sha512 is the hash of the tar.gz bundle. # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=` and copy the sha from the error message.) - SHA512 0b0e5ff823c49b9534b8c32800679806c5d7c29020af058da043c3e6e36ae3c32a1cdd5a21ece97dd60bc7dd4703967f683beac435dbb8514638a6cc55e5dea8 + SHA512 fb5aa8c2e6c15e5d22746ee40ed196f2fb7eafec9abfcbf94e7e70854734e99dd09886e1d68374fb995fe597e158100aa89260579e34cf5d9fb75d501b186d6a ) vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS diff --git a/include/mimalloc.h b/include/mimalloc.h index ce814d18..7bde743f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 194 // major + 2 digits minor +#define MI_MALLOC_VERSION 195 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From d21e2b8dd350356e933e08df1541dc45ea2f8719 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 13 Jun 2025 22:18:51 -0700 Subject: [PATCH 352/352] Update readme.md for v3.1.5 --- readme.md | 89 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/readme.md b/readme.md index 71aaf7a2..ddf358b2 100644 --- a/readme.md +++ b/readme.md @@ -12,7 +12,7 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release : `v3.1.4` (beta) (2025-06-09). +Latest release : `v3.1.5` (beta) (2025-06-13). Latest v2 release: `v2.2.4` (2025-06-09). Latest v1 release: `v1.9.4` (2024-06-09). @@ -83,6 +83,7 @@ Enjoy! ### Releases +* 2025-06-13, `v3.1.5`: Bug fix release where memory was not always correctly committed (issue #1098). * 2025-06-09, `v1.9.4`, `v2.2.4`, `v3.1.4` (beta) : Some important bug fixes, including a case where OS memory was not always fully released. Improved v3 performance, build on XBox, fix build on Android, support interpose for older macOS versions, use MADV_FREE_REUSABLE on macOS, always check commit success, better support for Windows @@ -103,53 +104,13 @@ Enjoy! add 0-byte to canary; upstream CPython fixes; reduce .bss size; allow fixed TLS slot on Windows for improved performance. * 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches. -* 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation. -* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. - Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size - directly available (and new `block_size_shift` to improve aligned block free-ing). - New approach to collection of abandoned segments: When - a thread terminates the segments it owns are abandoned (containing still live objects) and these can be - reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's - which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in - an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim` - gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%). - -* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity - by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory - usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. - -* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. - -* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision - with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS - abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes. - -* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support. - Support arbitrary large alignments (in particular for `std::pmr` pools). - Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). - Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). - Various small bug fixes. - -* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow - detection. Initial - support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . - -* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation - even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix - warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object - allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes. - -* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on - Windows 11, fix compilation with musl, potentially reduced - committed memory, add `bin/minject` for Windows, - improved wasm support, faster aligned allocation, - various small fixes. * [Older release notes](#older-release-notes) Special thanks to: -* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his many contributions, and making +* Sergiy Kuryata for his contributions on reducing memory commit -- especially on Windows with the Windows thread pool (now implemented in v3). +* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his _many_ contributions, and making mimalloc work better on many less common operating systems, like Haiku, Dragonfly, etc. * Mary Feofanova (@mary3000), Evgeniy Moiseenko, and Manuel Pöter (@mpoeter) for making mimalloc TSAN checkable, and finding memory model bugs using the [genMC] model checker. @@ -904,6 +865,48 @@ provided by the bot. You will only need to do this once across all repos using o # Older Release Notes +* 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation. +* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. + Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size + directly available (and new `block_size_shift` to improve aligned block free-ing). + New approach to collection of abandoned segments: When + a thread terminates the segments it owns are abandoned (containing still live objects) and these can be + reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's + which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in + an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim` + gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%). + +* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity + by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory + usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. + +* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. + +* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision + with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS + abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes. + +* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support. + Support arbitrary large alignments (in particular for `std::pmr` pools). + Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). + Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). + Various small bug fixes. + +* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow + detection. Initial + support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . + +* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation + even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix + warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object + allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes. + +* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on + Windows 11, fix compilation with musl, potentially reduced + committed memory, add `bin/minject` for Windows, + improved wasm support, faster aligned allocation, + various small fixes. + * 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including M1), improved performance for v2 for large objects, Python integration improvements, more standard installation directories, various small fixes.