diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 7d452b55..d9bce9c0 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -95,7 +95,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default @@ -118,7 +118,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default @@ -225,7 +225,6 @@ - diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index df0bf5ed..639a9d4e 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -58,9 +58,6 @@ Source Files - - Source Files - Source Files diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 3e453471..5b151da7 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -227,7 +227,6 @@ - true diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 28d94e99..d32080f5 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -47,9 +47,6 @@ Source Files - - Source Files - Source Files diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f6f2e2ae..f17d8af0 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -39,10 +39,20 @@ bool _mi_preloading(); // true while the C runtime is not ready // os.c size_t _mi_os_page_size(void); +size_t _mi_os_large_page_size(); void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld); + +/* // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld); void* _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld); @@ -55,6 +65,7 @@ bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); void _mi_mem_collect(mi_stats_t* stats); +*/ // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -62,7 +73,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; @@ -233,27 +244,47 @@ static inline mi_segment_t* _mi_ptr_segment(const void* p) { return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); } +static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { + mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0); + return (mi_page_t*)(s); +} + +static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { + mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0); + return (mi_slice_t*)(p); +} + +static size_t mi_slice_index(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + ptrdiff_t index = slice - segment->slices; + mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_count); + return index; +} + // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]); + mi_assert_internal(segment == NULL || page == mi_slice_to_page(&segment->slices[mi_slice_index(mi_page_to_slice((mi_page_t*)page))])); return segment; } // Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { - // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); - uintptr_t idx = (uintptr_t)diff >> segment->page_shift; - mi_assert_internal(idx < segment->capacity); - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); - return &((mi_segment_t*)segment)->pages[idx]; + uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT; + mi_assert_internal(idx < segment->slice_count); + mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; + mi_slice_t* slice = slice0 - slice0->slice_offset; // adjust to the block that holds the page data + mi_assert_internal(slice->slice_count > slice0->slice_offset); + mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_count); + return mi_slice_to_page(slice); } // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - return _mi_segment_page_start(segment, page, page->block_size, page_size); + return _mi_segment_page_start(segment, page, page_size); } // Get the page containing the pointer diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 67ad8516..7e14daca 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -74,27 +74,28 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit -#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb -#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4mb -#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb +#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb +#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64mb + +#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64kb +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SEGMENT_SLICE_SHIFT) // 512kb + // Derived constants -#define MI_SEGMENT_SIZE (1<>MI_INTPTR_SHIFT) -#define MI_HUGE_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` @@ -103,7 +104,7 @@ terms of the MIT license. A copy of the license can be found in the file // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) -#if (MI_LARGE_WSIZE_MAX >= 655360) +#if (MI_MEDIUM_WSIZE_MAX >= 655360) #error "define more bins" #endif @@ -154,20 +155,20 @@ typedef uintptr_t mi_thread_free_t; // - using `uint16_t` does not seem to slow things down typedef struct mi_page_s { // "owned" by the segment - uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` - bool segment_in_use:1; // `true` if the segment allocated this page - bool is_reset:1; // `true` if the page memory was reset - bool is_committed:1; // `true` if the page virtual memory is committed + size_t slice_count; // slices in this page (0 if not a page) + uint16_t slice_offset; // distance from the actual page data slice (0 if a page) + bool is_reset; // `true` if the page memory was reset + bool is_committed; // `true` if the page virtual memory is committed // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - // 16 bits padding + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists #endif - mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1 + mi_page_flags_t flags; size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) @@ -182,7 +183,7 @@ typedef struct mi_page_s { // improve page index calculation #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - void* padding[1]; // 12 words on 64-bit + // void* padding[1]; // 12 words on 64-bit #elif MI_INTPTR_SIZE==4 // void* padding[1]; // 12 words on 32-bit #endif @@ -193,30 +194,37 @@ typedef struct mi_page_s { typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64kb pages inside a segment MI_PAGE_MEDIUM, // medium blocks go into 512kb pages inside a segment - MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment - MI_PAGE_HUGE // huge blocks (>512kb) are put into a single page in a segment of the exact size (but still 2mb aligned) + MI_PAGE_LARGE, // larger blocks go into a page of just one block + MI_PAGE_HUGE, // huge blocks (>16mb) are put into a single page in a single segment. } mi_page_kind_t; +typedef enum mi_segment_kind_e { + MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside. + MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. +} mi_segment_kind_t; + +typedef mi_page_t mi_slice_t; + // Segments are large allocated memory blocks (2mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { struct mi_segment_s* next; struct mi_segment_s* prev; - struct mi_segment_s* abandoned_next; + struct mi_segment_s* abandoned_next; // abandoned segment stack: `used == abandoned` size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t used; // count of pages in use (`used <= capacity`) - size_t capacity; // count of available pages (`#free + used`) + size_t used; // count of pages in use size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` size_t memid; // id for the os-level memory manager + bool all_committed; // layout like this to optimize access in `mi_free` - size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - volatile uintptr_t thread_id; // unique id of the thread owning this segment - mi_page_kind_t page_kind; // kind of pages: small, large, or huge - mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages + mi_segment_kind_t kind; + uintptr_t thread_id; + size_t slice_count; // slices in this segment (at most MI_SLICES_PER_SEGMENT) + mi_slice_t slices[MI_SLICES_PER_SEGMENT]; } mi_segment_t; @@ -326,13 +334,13 @@ typedef struct mi_stats_s { mi_stat_count_t commit_calls; mi_stat_count_t threads; mi_stat_count_t huge; - mi_stat_count_t giant; + mi_stat_count_t large; mi_stat_count_t malloc; mi_stat_count_t segments_cache; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t huge_count; - mi_stat_counter_t giant_count; + mi_stat_counter_t large_count; #if MI_STAT>1 mi_stat_count_t normal[MI_BIN_HUGE+1]; #endif @@ -367,11 +375,11 @@ typedef struct mi_segment_queue_s { mi_segment_t* last; } mi_segment_queue_t; +#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SEGMENT_SIZE) // Segments thread local data typedef struct mi_segments_tld_s { - mi_segment_queue_t small_free; // queue of segments with free small pages - mi_segment_queue_t medium_free; // queue of segments with free medium pages + mi_page_queue_t pages[MI_SEGMENT_BIN_MAX+1]; // free pages inside segments size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 24f6c440..c605d637 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -43,7 +43,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* heap, size_t size, size_t if (p == NULL) return NULL; // .. and align within the allocation - mi_page_set_has_aligned( _mi_ptr_page(p), true ); + mi_page_set_has_aligned(_mi_ptr_page(p), true); uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment); mi_assert_internal(adjust % sizeof(uintptr_t) == 0); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); diff --git a/src/alloc.c b/src/alloc.c index bbe504a1..b5a48bde 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -226,7 +226,7 @@ void mi_free(void* p) mi_attr_noexcept #endif mi_page_t* const page = _mi_segment_page_of(segment, p); - + #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); mi_heap_stat_decrease( heap, malloc, mi_usable_size(p)); @@ -235,9 +235,9 @@ void mi_free(void* p) mi_attr_noexcept } // huge page stat is accounted for in `_mi_page_retire` #endif - - const uintptr_t tid = _mi_thread_id(); - if (mi_likely(tid == page->flags)) { // if equal, the thread id matches and it is not a full page, nor has aligned blocks + + uintptr_t tid = _mi_thread_id(); + if (mi_likely(page->flags == tid)) { // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; mi_block_set_next(page, block, page->local_free); diff --git a/src/heap.c b/src/heap.c index 768cab96..7b5d7a07 100644 --- a/src/heap.c +++ b/src/heap.c @@ -150,7 +150,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions if (collect >= FORCE && _mi_is_main_thread()) { - _mi_mem_collect(&heap->tld->stats); + // _mi_mem_collect(&heap->tld->stats); } } @@ -245,9 +245,9 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); // stats - if (page->block_size > MI_LARGE_SIZE_MAX) { - if (page->block_size > MI_HUGE_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.giant,page->block_size); + if (page->block_size > MI_MEDIUM_SIZE_MAX) { + if (page->block_size <= MI_LARGE_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.large,page->block_size); } else { _mi_stat_decrease(&heap->tld->stats.huge, page->block_size); @@ -255,7 +255,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ } #if (MI_STAT>1) size_t inuse = page->used - page->thread_freed; - if (page->block_size <= MI_LARGE_SIZE_MAX) { + if (page->block_size <= MI_MEDIUM_SIZE_MAX) { mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); } mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... diff --git a/src/init.c b/src/init.c index 77ce4aad..ff0fa76c 100644 --- a/src/init.c +++ b/src/init.c @@ -21,7 +21,7 @@ const mi_page_t _mi_page_empty = { NULL, 0, 0, 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - , { NULL } + // , { NULL } #endif }; @@ -43,8 +43,8 @@ const mi_page_t _mi_page_empty = { QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ - QNULL(MI_LARGE_WSIZE_MAX + 1 /* 655360, Huge queue */), \ - QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } + QNULL(MI_MEDIUM_WSIZE_MAX + 1 /* 655360, Huge queue */), \ + QNULL(MI_MEDIUM_WSIZE_MAX + 2) /* Full queue */ } #define MI_STAT_COUNT_NULL() {0,0,0,0} @@ -91,14 +91,23 @@ const mi_heap_t _mi_heap_empty = { mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; +// Empty page queues for every bin +#define MI_SEGMENT_PAGE_QUEUES_EMPTY \ + { QNULL(0), \ + QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ + QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ + QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ + QNULL( 160), QNULL( 192), QNULL( 224), /* 27 */ } + + #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) static mi_tld_t tld_main = { 0, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { MI_SEGMENT_PAGE_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments + { 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { diff --git a/src/page-queue.c b/src/page-queue.c index e59620c2..f396e233 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -34,15 +34,15 @@ terms of the MIT license. A copy of the license can be found in the file static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) { - return (pq->block_size == (MI_LARGE_SIZE_MAX+sizeof(uintptr_t))); + return (pq->block_size == (MI_MEDIUM_SIZE_MAX+sizeof(uintptr_t))); } static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) { - return (pq->block_size == (MI_LARGE_SIZE_MAX+(2*sizeof(uintptr_t)))); + return (pq->block_size == (MI_MEDIUM_SIZE_MAX+(2*sizeof(uintptr_t)))); } static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { - return (pq->block_size > MI_LARGE_SIZE_MAX); + return (pq->block_size > MI_MEDIUM_SIZE_MAX); } /* ----------------------------------------------------------- @@ -116,7 +116,7 @@ extern inline uint8_t _mi_bin(size_t size) { bin = (uint8_t)wsize; } #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { + else if (wsize > MI_MEDIUM_WSIZE_MAX) { bin = MI_BIN_HUGE; } else { @@ -147,7 +147,7 @@ size_t _mi_bin_size(uint8_t bin) { // Good size for allocation size_t mi_good_size(size_t size) mi_attr_noexcept { - if (size <= MI_LARGE_SIZE_MAX) { + if (size <= MI_MEDIUM_SIZE_MAX) { return _mi_bin_size(_mi_bin(size)); } else { @@ -245,7 +245,7 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; @@ -268,7 +268,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ mi_assert_internal(page->heap == NULL); mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(page->block_size == queue->block_size || - (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || + (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); @@ -297,8 +297,8 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || (page->block_size == to->block_size && mi_page_queue_is_full(from)) || (page->block_size == from->block_size && mi_page_queue_is_full(to)) || - (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_full(to))); + (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->block_size > MI_MEDIUM_SIZE_MAX && mi_page_queue_is_full(to))); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; diff --git a/src/page.c b/src/page.c index 9d645b6c..b1fd1e69 100644 --- a/src/page.c +++ b/src/page.c @@ -74,7 +74,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); @@ -102,7 +102,7 @@ bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_MEDIUM_SIZE_MAX || mi_page_is_in_full(page)); mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); } return true; @@ -356,9 +356,9 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_page_set_has_aligned(page, false); // account for huge pages here - if (page->block_size > MI_LARGE_SIZE_MAX) { - if (page->block_size > MI_HUGE_SIZE_MAX) { - _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); + if (page->block_size > MI_MEDIUM_SIZE_MAX) { + if (page->block_size <= MI_LARGE_SIZE_MAX) { + _mi_stat_decrease(&page->heap->tld->stats.large, page->block_size); } else { _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); @@ -554,7 +554,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields size_t page_size; - _mi_segment_page_start(segment, page, block_size, &page_size); + _mi_segment_page_start(segment, page, &page_size); page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); @@ -702,7 +702,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { ----------------------------------------------------------- */ // A huge page is allocated directly without being in a queue -static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { +static mi_page_t* mi_large_page_alloc(mi_heap_t* heap, size_t size) { size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t); mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_queue_t* pq = mi_page_queue(heap,block_size); @@ -711,9 +711,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); - if (page->block_size > MI_HUGE_SIZE_MAX) { - _mi_stat_increase(&heap->tld->stats.giant, block_size); - _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); + if (page->block_size <= MI_LARGE_SIZE_MAX) { + _mi_stat_increase(&heap->tld->stats.large, block_size); + _mi_stat_counter_increase(&heap->tld->stats.large_count, 1); } else { _mi_stat_increase(&heap->tld->stats.huge, block_size); @@ -744,12 +744,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // huge allocation? mi_page_t* page; - if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) { + if (mi_unlikely(size > MI_MEDIUM_SIZE_MAX)) { if (mi_unlikely(size >= (SIZE_MAX - MI_MAX_ALIGN_SIZE))) { page = NULL; } else { - page = mi_huge_page_alloc(heap,size); + page = mi_large_page_alloc(heap,size); } } else { diff --git a/src/segment.c b/src/segment.c index 736345bf..31117857 100644 --- a/src/segment.c +++ b/src/segment.c @@ -15,16 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file /* ----------------------------------------------------------- Segment allocation - We allocate pages inside big OS allocated "segments" - (4mb on 64-bit). This is to avoid splitting VMA's on Linux - and reduce fragmentation on other OS's. Each thread - owns its own segments. - - Currently we have: - - small pages (64kb), 64 in one segment - - medium pages (512kb), 8 in one segment - - large pages (4mb), 1 in one segment - - huge blocks > MI_LARGE_SIZE_MAX (512kb) are directly allocated by the OS + In any case the memory for a segment is virtual and only committed on demand (i.e. we are careful to not touch the memory @@ -35,75 +26,103 @@ terms of the MIT license. A copy of the license can be found in the file be reclaimed by still running threads, much like work-stealing. ----------------------------------------------------------- */ - /* ----------------------------------------------------------- - Queue of segments containing free pages + Bins ----------------------------------------------------------- */ - - -#if (MI_DEBUG>1) -static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { - mi_assert_internal(segment != NULL); - mi_segment_t* list = queue->first; - while (list != NULL) { - if (list == segment) break; - mi_assert_internal(list->next==NULL || list->next->prev == list); - mi_assert_internal(list->prev==NULL || list->prev->next == list); - list = list->next; - } - return (list == segment); +// Use bit scan forward to quickly find the first zero bit if it is available +#if defined(_MSC_VER) +#include +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + #if (MI_INTPTR_SIZE==8) + _BitScanReverse64(&idx, x); + #else + _BitScanReverse(&idx, x); + #endif + return idx; } +#elif defined(__GNUC__) || defined(__clang__) +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); +} +#else +#error "define bsr for your platform" #endif -static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) { - return (queue->first == NULL); +static size_t mi_slice_bin4(size_t slice_count) { + if (slice_count==0) return 0; + mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); + size_t s = mi_bsr(slice_count); + if (s <= 1) return slice_count; + size_t bin = ((s << 1) | (slice_count >> (s - 1))&0x01); + return bin; } -static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) { - mi_assert_expensive(mi_segment_queue_contains(queue, segment)); - if (segment->prev != NULL) segment->prev->next = segment->next; - if (segment->next != NULL) segment->next->prev = segment->prev; - if (segment == queue->first) queue->first = segment->next; - if (segment == queue->last) queue->last = segment->prev; - segment->next = NULL; - segment->prev = NULL; +static size_t mi_slice_bin8(size_t slice_count) { + if (slice_count==0) return 0; + mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); + size_t s = mi_bsr(slice_count); + if (s <= 2) return slice_count; + size_t bin = ((s << 2) | (slice_count >> (s - 2))&0x03) - 5; + return bin; } -static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) { - mi_assert_expensive(!mi_segment_queue_contains(queue, segment)); - segment->next = NULL; - segment->prev = queue->last; - if (queue->last != NULL) { - mi_assert_internal(queue->last->next == NULL); - queue->last->next = segment; - queue->last = segment; - } - else { - queue->last = queue->first = segment; - } +static size_t mi_slice_bin(size_t slice_count) { + mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE); + mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) == MI_SEGMENT_BIN_MAX); + size_t bin = (slice_count==0 ? 0 : mi_slice_bin8(slice_count)); + mi_assert_internal(bin >= 0 && bin <= MI_SEGMENT_BIN_MAX); + return bin; } -static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi_segments_tld_t* tld) { - if (kind == MI_PAGE_SMALL) return &tld->small_free; - else if (kind == MI_PAGE_MEDIUM) return &tld->medium_free; - else return NULL; + +/* ----------------------------------------------------------- + Page Queues +----------------------------------------------------------- */ +static bool mi_page_queue_is_empty(mi_page_queue_t* pq) { + return (pq->first == NULL); } -static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - return mi_segment_free_queue_of_kind(segment->page_kind, tld); +static mi_page_t* mi_page_queue_pop(mi_page_queue_t* pq) +{ + mi_page_t* page = pq->first; + if (page==NULL) return NULL; + mi_assert_internal(page->prev==NULL); + pq->first = page->next; + if (page->next == NULL) pq->last = NULL; + else page->next->prev = NULL; + page->next = NULL; + page->prev = NULL; // paranoia + page->block_size = 1; // no more free + return page; } -// remove from free queue if it is in one -static void mi_segment_remove_from_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); // may be NULL - bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); - if (in_queue) { - mi_segment_queue_remove(queue, segment); - } +static void mi_page_queue_push(mi_page_queue_t* pq, mi_page_t* page) { + // todo: or push to the end? + mi_assert_internal(page->prev == NULL && page->next==NULL); + page->prev = NULL; // paranoia + page->next = pq->first; + pq->first = page; + if (page->next != NULL) page->next->prev = page; + else pq->last = page; + page->block_size = 0; // free } -static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_enqueue(mi_segment_free_queue(segment, tld), segment); +static mi_page_queue_t* mi_page_queue_for(size_t slice_count, mi_segments_tld_t* tld) { + size_t bin = mi_slice_bin(slice_count); + return &tld->pages[bin]; +} + +static void mi_page_queue_remove(mi_page_queue_t* pq, mi_page_t* page) { + mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0); + if (page->prev != NULL) page->prev->next = page->next; + else pq->first = page->next; + if (page->next != NULL) page->next->prev = page->prev; + else pq->last = page->prev; + page->prev = NULL; + page->next = NULL; + page->block_size = 1; // no more free } @@ -112,31 +131,47 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t ----------------------------------------------------------- */ #if (MI_DEBUG > 1) -static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); - bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); - if (in_queue) { - mi_assert_expensive(mi_segment_queue_contains(queue, segment)); +static bool mi_page_queue_contains(mi_page_queue_t* pq, mi_page_t* page) { + for (mi_page_t* p = pq->first; p != NULL; p = p->next) { + if (p==page) return true; } - return in_queue; + return false; } -static size_t mi_segment_pagesize(mi_segment_t* segment) { - return ((size_t)1 << segment->page_shift); -} -static bool mi_segment_is_valid(mi_segment_t* segment) { +static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->abandoned <= segment->used); - size_t nfree = 0; - for (size_t i = 0; i < segment->capacity; i++) { - if (!segment->pages[i].segment_in_use) nfree++; + mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); + //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); + mi_slice_t* slice = &segment->slices[0]; + size_t page_count = 0; + mi_page_queue_t* pq; + while(slice < &segment->slices[segment->slice_count]) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + if (slice->block_size > 0) { // a page in use, all slices need their back offset set + page_count++; + for (size_t i = 0; i < slice->slice_count; i++) { + mi_assert_internal((slice+i)->slice_offset == i); + mi_assert_internal(i==0 || (slice+i)->slice_count == 0); + mi_assert_internal(i==0 || (slice+i)->block_size == 1); + } + } + else { // free range of slices; only last slice needs a valid back offset + mi_slice_t* end = slice + slice->slice_count - 1; + mi_assert_internal(slice == end - end->slice_offset); + mi_assert_internal(slice == end || end->slice_count == 0 ); + mi_assert_internal(end->block_size == 0); + if (segment->kind == MI_SEGMENT_NORMAL && segment->thread_id != 0) { + pq = mi_page_queue_for(slice->slice_count,tld); + mi_assert_internal(mi_page_queue_contains(pq,mi_slice_to_page(slice))); + } + } + slice = slice + slice->slice_count; } - mi_assert_internal(nfree + segment->used == segment->capacity); - mi_assert_internal(segment->thread_id == _mi_thread_id()); // or 0 - mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || - (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); + mi_assert_internal(slice == &segment->slices[segment->slice_count]); + mi_assert_internal(page_count == segment->used + 1); return true; } #endif @@ -145,28 +180,32 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { Segment size calculations ----------------------------------------------------------- */ -// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) +// Start of the page available memory; can be used on uninitialized pages +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); - uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; - - if (page->segment_idx == 0) { - // the first page starts after the segment info (and possible guard page) - p += segment->segment_info_size; - psize -= segment->segment_info_size; - // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) - if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { - size_t adjust = block_size - ((uintptr_t)p % block_size); - if (adjust < block_size) { - p += adjust; - psize -= adjust; - } - mi_assert_internal((uintptr_t)p % block_size == 0); - } + mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); + ptrdiff_t idx = slice - segment->slices; + size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; + uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); + /* + if (idx == 0) { + // the first page starts after the segment info (and possible guard page) + p += segment->segment_info_size; + psize -= segment->segment_info_size; + // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + // to ensure this, we over-estimate and align with the OS page size + const size_t asize = _mi_os_page_size(); + uint8_t* q = (uint8_t*)_mi_align_up((uintptr_t)p, _mi_os_page_size()); + if (p < q) { + psize -= (q - p); + p = q; + } + mi_assert_internal((uintptr_t)p % _mi_os_page_size() == 0); } + */ + long secure = mi_option_get(mi_option_secure); - if (secure > 1 || (secure == 1 && page->segment_idx == segment->capacity - 1)) { + if (secure > 1 || (secure == 1 && slice == &segment->slices[segment->slice_count - 1])) { // secure == 1: the last page has an os guard page at the end // secure > 1: every page has an os guard page psize -= _mi_os_page_size(); @@ -178,34 +217,23 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { - /* - if (mi_option_is_enabled(mi_option_secure)) { - // always reserve maximally so the protection falls on - // the same address area, as we need to reuse them from the caches interchangably. - capacity = MI_SMALL_PAGES_PER_SEGMENT; - } - */ - size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; +static size_t mi_segment_size(size_t required, size_t* pre_size, size_t* info_size) { + size_t page_size = _mi_os_page_size(); + size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); size_t guardsize = 0; - size_t isize = 0; - - if (!mi_option_is_enabled(mi_option_secure)) { - // normally no guard pages - isize = _mi_align_up(minsize, (16 > MI_MAX_ALIGN_SIZE ? 16 : MI_MAX_ALIGN_SIZE)); - } - else { + + if (mi_option_is_enabled(mi_option_secure)) { // in secure mode, we set up a protected page in between the segment info // and the page data (and one at the end of the segment) - size_t page_size = _mi_os_page_size(); - isize = _mi_align_up(minsize, page_size); - guardsize = page_size; - required = _mi_align_up(required, page_size); + guardsize = page_size; + required = _mi_align_up(required, page_size); } ; if (info_size != NULL) *info_size = isize; - if (pre_size != NULL) *pre_size = isize + guardsize; - return (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_PAGE_HUGE_ALIGN) ); + if (pre_size != NULL) *pre_size = isize + guardsize; + size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_SEGMENT_SLICE_SIZE) ); + mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); + return segment_size; } @@ -229,15 +257,15 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); if (mi_option_is_enabled(mi_option_secure)) { - _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + _mi_os_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->stats); + _mi_os_free(segment, segment_size, /*segment->memid,*/ tld->stats); } // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -// and no more than 4. -#define MI_SEGMENT_CACHE_MAX (4) +// and no more than 1. +#define MI_SEGMENT_CACHE_MAX (1) #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset @@ -270,14 +298,13 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) { } static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); mi_assert_internal(segment->next == NULL); if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { return false; } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); + _mi_os_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); } segment->next = tld->cache; tld->cache = segment; @@ -297,64 +324,119 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } +/* ----------------------------------------------------------- + Slices +----------------------------------------------------------- */ + + +static uint8_t* mi_slice_start(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + return ((uint8_t*)segment + (mi_slice_index(slice)*MI_SEGMENT_SLICE_SIZE)); +} + +static mi_slice_t* mi_segment_last_slice(mi_segment_t* segment) { + return &segment->slices[segment->slice_count-1]; +} + +/* ----------------------------------------------------------- + Page management +----------------------------------------------------------- */ + + +static void mi_segment_page_init(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(slice_index >= 0 && slice_index < segment->slice_count); + size_t bin = mi_slice_bin(slice_count); + if (slice_count==0) slice_count = 1; + mi_assert_internal(slice_count >= 0 && slice_index + slice_count - 1 < segment->slice_count); + + // set first and last slice (the intermediates can be undetermined) + mi_slice_t* slice = &segment->slices[slice_index]; + slice->slice_count = slice_count; + slice->slice_offset = 0; + if (slice_count > 1) { + mi_slice_t* end = &segment->slices[slice_index + slice_count - 1]; + end->slice_count = 0; + end->slice_offset = (uint16_t)slice_count - 1; + end->block_size = 0; + } + // and push it on the free page queue + mi_page_queue_push( &tld->pages[bin], mi_slice_to_page(slice) ); +} + +static void mi_segment_page_add_free(mi_page_t* page, mi_segments_tld_t* tld) { + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_internal(page->block_size==0 && page->slice_count>0 && page->slice_offset==0); + size_t slice_index = mi_slice_index(mi_page_to_slice(page)); + mi_segment_page_init(segment,slice_index,page->slice_count,tld); +} + + +static void mi_segment_page_split(mi_page_t* page, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(page->slice_count >= slice_count); + mi_assert_internal(page->block_size > 0); // no more in free queue + if (page->slice_count <= slice_count) return; + mi_segment_t* segment = _mi_page_segment(page); + size_t next_index = mi_slice_index(mi_page_to_slice(page)) + slice_count; + size_t next_count = page->slice_count - slice_count; + mi_segment_page_init( segment, next_index, next_count, tld ); + page->slice_count = slice_count; +} + +static mi_page_t* mi_segment_page_find(size_t slice_count, mi_segments_tld_t* tld) { + // search from best fit up + mi_page_queue_t* pq = mi_page_queue_for(slice_count,tld); + if (slice_count == 0) slice_count = 1; + while (pq <= &tld->pages[MI_SEGMENT_BIN_MAX] && mi_page_queue_is_empty(pq)) { + pq++; + } + if (pq > &tld->pages[MI_SEGMENT_BIN_MAX]) { + // could not find a page.. + return NULL; + } + + // pop the page and split to the right size + mi_page_t* page = mi_page_queue_pop(pq); + mi_assert_internal(page != NULL && page->slice_count >= slice_count && page->slice_offset == 0); + if (page->slice_count > slice_count) { + mi_segment_page_split(page, slice_count, tld); + } + mi_assert_internal(page != NULL && page->slice_count == slice_count); + return page; +} + +static void mi_segment_page_remove(mi_slice_t* slice, mi_segments_tld_t* tld) { + mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0); + mi_page_queue_t* pq = mi_page_queue_for(slice->slice_count, tld); + mi_page_queue_remove(pq, mi_slice_to_page(slice)); +} + + /* ----------------------------------------------------------- Segment allocation ----------------------------------------------------------- */ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { // calculate needed sizes first - size_t capacity; - if (page_kind == MI_PAGE_HUGE) { - mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0); - capacity = 1; - } - else { - mi_assert_internal(required == 0); - size_t page_size = (size_t)1 << page_shift; - capacity = MI_SEGMENT_SIZE / page_size; - mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); - mi_assert_internal(capacity >= 1 && capacity <= MI_SMALL_PAGES_PER_SEGMENT); - } size_t info_size; size_t pre_size; - size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); + size_t segment_size = mi_segment_size(required, &pre_size, &info_size); + size_t slice_count = segment_size / MI_SEGMENT_SLICE_SIZE; mi_assert_internal(segment_size >= required); - size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); + //mi_assert_internal(pre_size % MI_SEGMENT_SLICE_SIZE == 0); // Try to get it from our thread local cache first - bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); - bool protection_still_good = false; + bool commit = mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_eager_region_commit) + || required > 0; // huge page mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); - if (segment != NULL) { - if (mi_option_is_enabled(mi_option_secure)) { - if (segment->page_kind != page_kind) { - _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs - } - else { - protection_still_good = true; // otherwise, the guard pages are still in place - } - } - if (!mi_option_is_enabled(mi_option_eager_commit)) { - if (page_kind > MI_PAGE_MEDIUM) { - _mi_mem_commit(segment, segment->segment_size, tld->stats); - } - else { - // ok, commit (and unreset) on demand again - } - } - else if (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset)) { - _mi_mem_unreset(segment, segment->segment_size, tld->stats); - } - } - else { + if (segment==NULL) { // Allocate the segment from the OS - size_t memid; - segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &memid, os_tld); + size_t memid = 0; + segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, /* &memid,*/ os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { - _mi_mem_commit(segment, info_size, tld->stats); + _mi_os_commit(segment, info_size, tld->stats); } segment->memid = memid; mi_segments_track_size((long)segment_size, tld); @@ -367,65 +449,73 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->memid = memid; } - if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) { + if (mi_option_is_enabled(mi_option_secure)) { // in secure mode, we set up a protected page in between the segment info // and the page data - mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); + mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); + _mi_os_protect((uint8_t*)segment + info_size, (pre_size - info_size)); size_t os_page_size = _mi_os_page_size(); - if (mi_option_get(mi_option_secure) <= 1) { - // and protect the last page too - _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); + // and protect the last page too + _mi_os_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); + slice_count--; // don't use the last slice :-( + } + + // initialize segment info + segment->segment_size = segment_size; + segment->segment_info_size = pre_size; + segment->thread_id = _mi_thread_id(); + segment->cookie = _mi_ptr_cookie(segment); + segment->slice_count = slice_count; + segment->all_committed = commit; + segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); + _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + + // reserve first slices for segment info + size_t islice_count = (segment->segment_info_size + MI_SEGMENT_SLICE_SIZE - 1)/MI_SEGMENT_SLICE_SIZE; + for (size_t i = 0; i < islice_count; i++) { + mi_slice_t* slice = &segment->slices[i]; + if (i==0) { + slice->slice_count = islice_count; + slice->block_size = islice_count * MI_SEGMENT_SLICE_SIZE; } else { - // protect every page - for (size_t i = 0; i < capacity; i++) { - _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); - } + slice->slice_offset = (uint16_t)i; + slice->block_size = 1; } } - segment->page_kind = page_kind; - segment->capacity = capacity; - segment->page_shift = page_shift; - segment->segment_size = segment_size; - segment->segment_info_size = pre_size; - segment->thread_id = _mi_thread_id(); - segment->cookie = _mi_ptr_cookie(segment); - for (uint8_t i = 0; i < segment->capacity; i++) { - segment->pages[i].segment_idx = i; - segment->pages[i].is_reset = false; - segment->pages[i].is_committed = commit; + // initialize initial free pages + if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page + mi_segment_page_init(segment, islice_count, segment->slice_count - islice_count, tld); } - _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); - //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { - UNUSED(force); - //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); - mi_assert(segment != NULL); - mi_segment_remove_from_free_queue(segment,tld); + mi_assert_internal(segment != NULL); + mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->prev == NULL); + mi_assert_internal(segment->used == 0); - mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); - mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); - mi_assert(segment->next == NULL); - mi_assert(segment->prev == NULL); - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - - // update reset memory statistics - /* - for (uint8_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->is_reset) { - page->is_reset = false; - mi_stat_decrease( tld->stats->reset,mi_page_size(page)); + // Remove the free pages + mi_slice_t* slice = &segment->slices[0]; + size_t page_count = 0; + while (slice < mi_segment_last_slice(segment)) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages .. + if (slice->block_size == 0) { + mi_segment_page_remove(slice, tld); } + page_count++; + slice = slice + slice->slice_count; } - */ + mi_assert_internal(page_count == 2); // first page is allocated by the segment itself + // stats + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } @@ -436,100 +526,143 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t } /* ----------------------------------------------------------- - Free page management inside a segment + Page allocation ----------------------------------------------------------- */ +static mi_page_t* mi_segment_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(required <= MI_LARGE_SIZE_MAX && page_kind <= MI_PAGE_LARGE); -static bool mi_segment_has_free(const mi_segment_t* segment) { - return (segment->used < segment->capacity); -} + // find a free page + size_t page_size = _mi_align_up(required,MI_SEGMENT_SLICE_SIZE); + size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; + mi_page_t* page = mi_segment_page_find(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); + if (page==NULL) { + // no free page, allocate a new segment and try again + if (mi_segment_alloc(0, tld, os_tld) == NULL) return NULL; // OOM + return mi_segment_page_alloc(page_kind, required, tld, os_tld); + } + mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { - mi_assert_internal(mi_segment_has_free(segment)); - mi_assert_expensive(mi_segment_is_valid(segment)); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use) { - if (page->is_reset || !page->is_committed) { - size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - mi_assert_internal(!(page->is_reset && !page->is_committed)); - if (!page->is_committed) { - page->is_committed = true; - _mi_mem_commit(start,psize,stats); - } - if (page->is_reset) { - page->is_reset = false; - _mi_mem_unreset(start, psize, stats); - } - } - return page; + // set slice back pointers and commit/unreset + mi_segment_t* segment = _mi_page_segment(page); + mi_slice_t* slice = mi_page_to_slice(page); + bool commit = false; + bool unreset = false; + for (size_t i = 0; i < page->slice_count; i++, slice++) { + slice->slice_offset = (uint16_t)i; + slice->block_size = 1; + if (i > 0) slice->slice_count = 0; + if (!segment->all_committed && !slice->is_committed) { + slice->is_committed = true; + commit = true; + } + if (slice->is_reset) { + slice->is_reset = false; + unreset = true; } } - mi_assert(false); - return NULL; + uint8_t* page_start = mi_slice_start(mi_page_to_slice(page)); + if(commit) { _mi_os_commit(page_start, page_size, tld->stats); } + if(unreset){ _mi_os_unreset(page_start, page_size, tld->stats); } + + // initialize the page and return + mi_assert_internal(segment->thread_id == _mi_thread_id()); + segment->used++; + mi_page_init_flags(page, segment->thread_id); + return page; +} + +static void mi_segment_page_free_coalesce(mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page != NULL && page->slice_count > 0 && page->slice_offset == 0 && page->block_size > 0); + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_internal(segment->used > 0); + segment->used--; + + // free and coalesce the page + mi_slice_t* slice = mi_page_to_slice(page); + size_t slice_count = slice->slice_count; + mi_slice_t* next = slice + slice->slice_count; + mi_assert_internal(next <= mi_segment_last_slice(segment) + 1); + if (next <= mi_segment_last_slice(segment) && next->block_size==0) { + // free next block -- remove it from free and merge + mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); + slice_count += next->slice_count; // extend + mi_segment_page_remove(next, tld); + } + if (slice > segment->slices) { + mi_slice_t* prev = slice - 1; + prev = prev - prev->slice_offset; + mi_assert_internal(prev >= segment->slices); + if (prev->block_size==0) { + // free previous slice -- remove it from free and merge + mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); + slice_count += prev->slice_count; + mi_segment_page_remove(prev, tld); + slice = prev; + } + } + + // and add the new free page + mi_segment_page_init(segment, mi_slice_index(slice), slice_count, tld); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); } /* ----------------------------------------------------------- - Free + Page Free ----------------------------------------------------------- */ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); - mi_assert_internal(page->segment_in_use); +static void mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page->block_size > 0); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(page->is_committed); + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment->all_committed || page->is_committed); size_t inuse = page->capacity * page->block_size; - _mi_stat_decrease(&stats->page_committed, inuse); - _mi_stat_decrease(&stats->pages, 1); + _mi_stat_decrease(&tld->stats->page_committed, inuse); + _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; - _mi_mem_reset(start, psize, stats); + _mi_os_reset(start, psize, tld->stats); } // zero the page data - uint8_t idx = page->segment_idx; // don't clear the index - bool is_reset = page->is_reset; // don't clear the reset flag + size_t slice_count = page->slice_count; // don't clear the slice_count + bool is_reset = page->is_reset; // don't clear the reset flag bool is_committed = page->is_committed; // don't clear the commit flag memset(page, 0, sizeof(*page)); - page->segment_idx = idx; - page->segment_in_use = false; + page->slice_count = slice_count; page->is_reset = is_reset; page->is_committed = is_committed; - segment->used--; + page->block_size = 1; + + // and free it + mi_segment_page_free_coalesce(page, tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); // mark it as free now - mi_segment_page_clear(segment, page, tld->stats); + mi_segment_page_clear(page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment mi_segment_free(segment, force, tld); } - else { - if (segment->used == segment->abandoned) { - // only abandoned pages; remove from free list and abandon - mi_segment_abandon(segment,tld); - } - else if (segment->used + 1 == segment->capacity) { - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // for now we only support small and medium pages - // move back to segments free list - mi_segment_insert_in_free_queue(segment,tld); - } - } + else if (segment->used == segment->abandoned) { + // only abandoned pages; remove from free list and abandon + mi_segment_abandon(segment,tld); + } } @@ -548,10 +681,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); - // remove the segment from the free page queue if needed - mi_segment_remove_from_free_queue(segment,tld); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + // all pages in the segment are abandoned; add it to the abandoned list segment->thread_id = 0; do { @@ -565,7 +696,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); @@ -583,7 +714,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated) - if (atmost < 8) atmost = 8; // but at least 8 + if (atmost < 2) atmost = 2; // but at least 2 } // for `atmost` `reclaimed` abandoned segments... @@ -597,42 +728,44 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen // got it. mi_atomic_decrement(&abandoned_count); - segment->thread_id = _mi_thread_id(); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned_next = NULL; mi_segments_track_size((long)segment->segment_size,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); _mi_stat_decrease(&tld->stats->segments_abandoned,1); - // add its abandoned pages to the current thread - mi_assert(segment->abandoned == segment->used); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->segment_in_use) { + mi_slice_t* slice = &segment->slices[0]; + while (slice < mi_segment_last_slice(segment)) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + mi_page_t* page = mi_slice_to_page(slice); + slice = slice + slice->slice_count; + if (page->block_size > 0) { // a page in use segment->abandoned--; - mi_assert(page->next == NULL); + mi_assert_internal(page->next == NULL && page->prev==NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); if (mi_page_all_free(page)) { // if everything free by now, free the page - mi_segment_page_clear(segment,page,tld->stats); + mi_segment_page_clear(page, tld); } else { // otherwise reclaim it - mi_page_init_flags(page,segment->thread_id); - _mi_page_reclaim(heap,page); + mi_page_init_flags(page, segment->thread_id); + _mi_page_reclaim(heap, page); } } + else { // free range of slices; add to the free pages + mi_segment_page_add_free(page,tld); + } } + mi_assert(segment->abandoned == 0); + segment->thread_id = _mi_thread_id(); // only now for valid checks if (segment->used == 0) { // due to page_clear mi_segment_free(segment,false,tld); } else { - reclaimed++; - // add its free pages to the the current thread free small segment queue - if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { - mi_segment_insert_in_free_queue(segment,tld); - } + reclaimed++; } } return (reclaimed>0); @@ -643,64 +776,16 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen Small page allocation ----------------------------------------------------------- */ -// Allocate a small page inside a segment. -// Requires that the page has free pages -static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld->stats); - page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); - segment->used++; - mi_assert_internal(segment->used <= segment->capacity); - if (segment->used == segment->capacity) { - // if no more free pages, remove from the queue - mi_assert_internal(!mi_segment_has_free(segment)); - mi_segment_remove_from_free_queue(segment,tld); - } - return page; -} - -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld); - if (mi_segment_queue_is_empty(free_queue)) { - mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld); - if (segment == NULL) return NULL; - mi_segment_enqueue(free_queue, segment); - } - mi_assert_internal(free_queue->first != NULL); - return mi_segment_page_alloc_in(free_queue->first,tld); -} - -static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); -} - -static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); -} - -/* ----------------------------------------------------------- - large page allocation ------------------------------------------------------------ */ - -static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); - if (segment == NULL) return NULL; - segment->used = 1; - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); - return page; -} - static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); + mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld); if (segment == NULL) return NULL; mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); segment->used = 1; - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + mi_page_t* page = mi_slice_to_page(&segment->slices[0]); + page->slice_count = segment->slice_count; + page->slice_offset = 0; + page->block_size = size; mi_page_init_flags(page,segment->thread_id); return page; } @@ -708,25 +793,144 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld /* ----------------------------------------------------------- Page allocation and free ----------------------------------------------------------- */ +/* static bool mi_is_good_fit(size_t bsize, size_t size) { // good fit if no more than 25% wasted return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4)); } +*/ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) { - page = mi_segment_small_page_alloc(tld,os_tld); + if (block_size <= MI_SMALL_SIZE_MAX) {// || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) { + page = mi_segment_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld); } - else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { - page = mi_segment_medium_page_alloc(tld, os_tld); + else if (block_size <= MI_MEDIUM_SIZE_MAX) {// || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { + page = mi_segment_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld); } - else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) { - page = mi_segment_large_page_alloc(tld, os_tld); + else if (block_size <= MI_LARGE_SIZE_MAX) { + page = mi_segment_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld); } else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); return page; } + + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (128mb) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ + +#if (MI_INTPTR_SIZE==8) +#define MI_MAX_ADDRESS ((size_t)1 << 44) // 16TB +#else +#define MI_MAX_ADDRESS ((size_t)1 << 31) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static volatile uintptr_t mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 1KiB per TB with 128MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on 128MiB? + uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE; + *bitidx = segindex % (8*MI_INTPTR_SIZE); + return (segindex / (8*MI_INTPTR_SIZE)); +} + +static void mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + if (index==0) return; + uintptr_t mask; + uintptr_t newmask; + do { + mask = mi_segment_map[index]; + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_compare_exchange(&mi_segment_map[index], newmask, mask)); +} + +static void mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE); + if (index == 0) return; + uintptr_t mask; + uintptr_t newmask; + do { + mask = mi_segment_map[index]; + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_compare_exchange(&mi_segment_map[index], newmask, mask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + mi_segment_t* segment = _mi_ptr_segment(p); + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first 4MiB in huge + if (mi_likely((mi_segment_map[index] & ((uintptr_t)1 << bitidx)) != 0)) { + return segment; // yes, allocated by us + } + if (index==0) return NULL; + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in 256MiB steps trough valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mi_segment_map[index] & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = _mi_bsr(lobits); + } + else { + loindex = index - 1; + while (loindex > 0 && mi_segment_map[loindex] == 0) loindex--; + if (loindex==0) return NULL; + lobitidx = _mi_bsr(mi_segment_map[loindex]); + } + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if (mi_unlikely(!cookie_ok)) return NULL; + if (((uint8_t*)segment + segment->segment_size) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + segment->segment_size); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return (_mi_segment_of(p) != NULL); +} + +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } +} + +bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + diff --git a/src/stats.c b/src/stats.c index e7d398b2..a9a022fb 100644 --- a/src/stats.c +++ b/src/stats.c @@ -106,11 +106,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->malloc, &src->malloc, 1); mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); mi_stat_add(&stats->huge, &src->huge, 1); - mi_stat_add(&stats->giant, &src->giant, 1); + mi_stat_add(&stats->large, &src->large, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); - mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1); + mi_stat_counter_add(&stats->large_count, &src->large_count, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) { @@ -232,11 +232,11 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); mi_stat_print(&normal, "normal", 1, out); mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); - mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out); + mi_stat_print(&stats->large, "giant", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &stats->huge, 1); - mi_stat_add(&total, &stats->giant, 1); + mi_stat_add(&total, &stats->large, 1); mi_stat_print(&total, "total", 1, out); _mi_fprintf(out, "malloc requested: "); mi_print_amount(stats->malloc.allocated, 1, out); diff --git a/test/main-override-static.c b/test/main-override-static.c index 6ddf4f37..7f20268a 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -6,8 +6,168 @@ #include #include // redefines malloc etc. +#include +#include + +#define MI_INTPTR_SIZE 8 +#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) + +#define MI_BIN_HUGE 100 +//#define MI_ALIGN2W + +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +#include +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse((DWORD*)&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; + #if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); + #elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); + #else + # error "define bsr for non-32 or 64-bit platforms" + #endif +} + +static inline size_t _mi_wsize_from_size(size_t size) { + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +extern inline uint8_t _mi_bin8(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } + #if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } + #endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + #if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes + #endif + wsize--; + // find the highest bit + uint8_t b = mi_bsr32((uint32_t)wsize); + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + } + return bin; +} + +extern inline uint8_t _mi_bin4(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } + #if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } + #endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + uint8_t b = mi_bsr32((uint32_t)wsize); + bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; + } + return bin; +} + +size_t _mi_binx4(size_t bsize) { + if (bsize==0) return 0; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 1) return bsize; + size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); + return bin; +} + +size_t _mi_binx8(size_t bsize) { + if (bsize==0) return 0; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 2) return bsize; + size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; + return bin; +} + +void mi_bins() { + //printf(" QNULL(1), /* 0 */ \\\n "); + size_t last_bin = 1; + for (size_t bsize = 0; bsize < 8*1024; bsize++) { + size_t size = bsize * 64 * 1024; + size_t bin = _mi_binx8(bsize); + if (bin != last_bin) { + printf("bsize: %6zd, size: %6zd, bin: %6zd\n", bsize, size, bin); + //printf("QNULL(%6zd), ", wsize); + //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); + last_bin = bin; + } + } +} + int main() { mi_version(); + mi_bins(); void* p1 = malloc(78); void* p2 = malloc(24); free(p1); @@ -25,7 +185,7 @@ int main() { //p1 = mi_malloc(32); //free(p1); //p2 = malloc(32); - //mi_free(p2); + //mi_free(p2); mi_stats_print(NULL); return 0; }