diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ad9b3ecf..151cd001 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -314,16 +314,37 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; } + +//----------------------------------------------------------- +// Page flags +//----------------------------------------------------------- static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { - return (page->flags.xthread_id << MI_PAGE_FLAGS_BITS); + return (page->flags & ~MI_PAGE_FLAGS_MASK); } static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) { - page->flags.value = 0; - page->flags.xthread_id = (thread_id >> MI_PAGE_FLAGS_BITS); - mi_assert(page->flags.value == thread_id); + page->flags = thread_id; } +static inline bool mi_page_is_in_full(const mi_page_t* page) { + return ((page->flags & 0x01) != 0); +} + +static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { + if (in_full) page->flags |= 0x01; + else page->flags &= ~0x01; +} + +static inline bool mi_page_has_aligned(const mi_page_t* page) { + return ((page->flags & 0x02) != 0); +} + +static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { + if (has_aligned) page->flags |= 0x02; + else page->flags &= ~0x02; +} + + // ------------------------------------------------------------------- // Encoding/Decoding the free list next pointers // ------------------------------------------------------------------- diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 4002c12c..db5b52cc 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -94,16 +94,16 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit #define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1Mb on 64-bit #define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT) - +#define MI_HUGE_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) // Maximum number of size classes. (spaced exponentially in 12.5% increments) -#define MI_BIN_HUGE (70U) +#define MI_BIN_HUGE (73U) -#if (MI_LARGE_WSIZE_MAX > 393216) +#if (MI_LARGE_WSIZE_MAX >= 655360) #error "define more bins" #endif @@ -123,25 +123,12 @@ typedef enum mi_delayed_e { } mi_delayed_t; -// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags +// Use the bottom 2 bits for the `in_full` and `has_aligned` flags +// and the rest for the threadid (we assume tid's never use those lower 2 bits). // This allows a single test in `mi_free` to check for unlikely cases // (namely, non-local free, aligned free, or freeing in a full page) -#define MI_PAGE_FLAGS_BITS (2) -#define MI_PAGE_FLAGS_TID_BITS (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS) -typedef union mi_page_flags_u { - uintptr_t value; - struct { - #ifdef MI_BIG_ENDIAN - uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS; - #endif - uintptr_t in_full : 1; - uintptr_t has_aligned : 1; - #ifndef MI_BIG_ENDIAN - uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS; - #endif - }; -} mi_page_flags_t; - +#define MI_PAGE_FLAGS_MASK ((uintptr_t)0x03) +typedef uintptr_t mi_page_flags_t; // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags @@ -339,10 +326,13 @@ typedef struct mi_stats_s { mi_stat_count_t commit_calls; mi_stat_count_t threads; mi_stat_count_t huge; + mi_stat_count_t giant; mi_stat_count_t malloc; mi_stat_count_t segments_cache; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; + mi_stat_counter_t huge_count; + mi_stat_counter_t giant_count; #if MI_STAT>1 mi_stat_count_t normal[MI_BIN_HUGE+1]; #endif @@ -393,12 +383,8 @@ typedef struct mi_segments_tld_s { } mi_segments_tld_t; // OS thread local data -typedef struct mi_os_tld_s { - uintptr_t mmap_next_probable; // probable next address start allocated by mmap (to guess which path to take on alignment) - void* mmap_previous; // previous address returned by mmap - uint8_t* pool; // pool of segments to reduce mmap calls on some platforms - size_t pool_available; // bytes available in the pool - mi_stats_t* stats; // points to tld stats +typedef struct mi_os_tld_s { + mi_stats_t* stats; // points to tld stats } mi_os_tld_t; // Thread local data diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 2f44f317..24f6c440 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -43,7 +43,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* heap, size_t size, size_t if (p == NULL) return NULL; // .. and align within the allocation - _mi_ptr_page(p)->flags.has_aligned = true; + mi_page_set_has_aligned( _mi_ptr_page(p), true ); uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment); mi_assert_internal(adjust % sizeof(uintptr_t) == 0); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); diff --git a/src/alloc.c b/src/alloc.c index bfb37d19..d0fd28cb 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -174,7 +174,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } - else if (mi_unlikely(page->flags.in_full)) { + else if (mi_unlikely(mi_page_is_in_full(page))) { _mi_page_unfull(page); } } @@ -194,7 +194,7 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) { - mi_block_t* block = (page->flags.has_aligned ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); + mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); _mi_free_block(page, local, block); } @@ -237,7 +237,7 @@ void mi_free(void* p) mi_attr_noexcept #endif uintptr_t tid = _mi_thread_id(); - if (mi_likely(tid == page->flags.value)) { + if (mi_likely(tid == page->flags)) { // if equal, the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; mi_block_set_next(page, block, page->local_free); @@ -273,7 +273,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { const mi_segment_t* segment = _mi_ptr_segment(p); const mi_page_t* page = _mi_segment_page_of(segment,p); size_t size = page->block_size; - if (mi_unlikely(page->flags.has_aligned)) { + if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); return (size - adjust); diff --git a/src/heap.c b/src/heap.c index 63954b3b..c18902b1 100644 --- a/src/heap.c +++ b/src/heap.c @@ -246,7 +246,12 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ // stats if (page->block_size > MI_LARGE_SIZE_MAX) { - mi_heap_stat_decrease(heap,huge,page->block_size); + if (page->block_size > MI_HUGE_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.giant,page->block_size); + } + else { + _mi_stat_decrease(&heap->tld->stats.huge, page->block_size); + } } #if (MI_STAT>1) size_t inuse = page->used - page->thread_freed; diff --git a/src/init.c b/src/init.c index 7215e4fb..b0c0fc5f 100644 --- a/src/init.c +++ b/src/init.c @@ -42,8 +42,8 @@ const mi_page_t _mi_page_empty = { QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ - QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \ - QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \ + QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ + QNULL(MI_LARGE_WSIZE_MAX + 1 /* 655360, Huge queue */), \ QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } #define MI_STAT_COUNT_NULL() {0,0,0,0} @@ -63,9 +63,8 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - MI_STAT_COUNT_NULL(), \ - { 0, 0 }, \ - { 0, 0 } \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() // -------------------------------------------------------- @@ -98,8 +97,8 @@ static mi_tld_t tld_main = { 0, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, NULL, NULL, 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { diff --git a/src/memory.c b/src/memory.c index 7f8cfb14..ccd810b3 100644 --- a/src/memory.c +++ b/src/memory.c @@ -126,7 +126,8 @@ Commit from a region // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) { +static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) +{ size_t mask = mi_region_block_mask(blocks,bitidx); mi_assert_internal(mask != 0); mi_assert_internal((mask & mi_atomic_read(®ion->map)) == mask); @@ -139,7 +140,13 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit start = ALLOCATING; // try to start allocating } else if (start == ALLOCATING) { - mi_atomic_yield(); // another thead is already allocating.. wait it out + // another thead is already allocating.. wait it out + // note: the wait here is not great (but should not happen often). Another + // strategy might be to just allocate another region in parallel. This tends + // to be bad for benchmarks though as these often start many threads at the + // same time leading to the allocation of too many regions. (Still, this might + // be the most performant and it's ok on 64-bit virtual memory with over-commit.) + mi_atomic_yield(); continue; } } while( start == ALLOCATING && !mi_atomic_compare_exchange_ptr(®ion->start, ALLOCATING, NULL) ); @@ -183,47 +190,35 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) { +static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) +{ mi_assert_internal(p != NULL && id != NULL); mi_assert_internal(blocks < MI_REGION_MAP_BITS); const uintptr_t mask = mi_region_block_mask(blocks,0); const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - size_t bitidx ; - uintptr_t map; - uintptr_t newmap; - do { // while no atomic claim success and not all bits seen - // find the first free range of bits - map = mi_atomic_read(®ion->map); - size_t m = map; - bitidx = 0; - do { - // skip ones - while ((m&1) != 0) { bitidx++; m>>=1; } - // count zeros - mi_assert_internal((m&1)==0); - size_t zeros = 1; - m >>= 1; - while(zeros < blocks && (m&1)==0) { zeros++; m>>=1; } - if (zeros == blocks) break; // found a range that fits - bitidx += zeros; - } - while(bitidx <= bitidx_max); - if (bitidx > bitidx_max) { - return true; // no error, but could not find a range either - } - // try to claim it - mi_assert_internal( (mask << bitidx) >> bitidx == mask ); // no overflow? - mi_assert_internal( (map & (mask << bitidx)) == 0); // fits in zero range - newmap = map | (mask << bitidx); - mi_assert_internal((newmap^map) >> bitidx == mask); + // scan linearly for a free range of zero bits + uintptr_t map = mi_atomic_read(®ion->map); + uintptr_t m = mask; // the mask shifted by bitidx + for(size_t bitidx = 0; bitidx <= bitidx_max; bitidx++, m <<= 1) { + if ((map & m) == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + uintptr_t newmap = map | m; + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_compare_exchange(®ion->map, newmap, map)) { + // no success, another thread claimed concurrently.. keep going + map = mi_atomic_read(®ion->map); + } + else { + // success, we claimed the bits + // now commit the block memory -- this can still fail + return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, p, id, tld); + } + } } - while(!mi_atomic_compare_exchange(®ion->map, newmap, map)); - - // success, we claimed the blocks atomically - // now commit the block memory -- this can still fail - return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, p, id, tld); + // no error, but also no bits found + return true; } // Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. @@ -274,13 +269,14 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* size_t count = mi_atomic_read(®ions_count); size_t idx = mi_atomic_read(®ion_next_idx); for (size_t visited = 0; visited < count; visited++, idx++) { - if (!mi_region_try_alloc_blocks(idx%count, blocks, size, commit, &p, id, tld)) return NULL; // error + if (idx >= count) idx = 0; // wrap around + if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error if (p != NULL) break; } if (p == NULL) { - // no free range in existing regions -- try to extend beyond the count - for (idx = count; idx < MI_REGION_MAX; idx++) { + // no free range in existing regions -- try to extend beyond the count.. but at most 4 regions + for (idx = count; idx < count + 4 && idx < MI_REGION_MAX; idx++) { if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error if (p != NULL) break; } diff --git a/src/os.c b/src/os.c index b7499796..72cd1040 100644 --- a/src/os.c +++ b/src/os.c @@ -225,27 +225,29 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, #elif defined(__wasi__) static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { - uintptr_t base = __builtin_wasm_memory_size(0) * os_page_size; + uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); - size_t alloc_size = aligned_base - base + size; - mi_assert(alloc_size >= size); + size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); + mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); if (alloc_size < size) return NULL; - if (__builtin_wasm_memory_grow(0, alloc_size / os_page_size) == SIZE_MAX) { + if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { errno = ENOMEM; return NULL; } - return (void*) aligned_base; + return (void*)aligned_base; } #else static void* mi_unix_mmapx(size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { void* p = NULL; #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) - // on 64-bit systems, use a special area for 4MiB aligned allocations + // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations static volatile intptr_t aligned_base = ((intptr_t)1 << 42); // starting at 4TiB - if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0 && (aligned_base%try_alignment)==0) { + if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0) { intptr_t hint = mi_atomic_add(&aligned_base,size) - size; - p = mmap((void*)hint,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + if (hint%try_alignment == 0) { + p = mmap((void*)hint,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + } } #endif if (p==NULL) { @@ -273,10 +275,10 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD #endif #if defined(VM_MAKE_TAG) - // darwin: tracking anonymous page with a specific ID all up to 98 are taken officially but LLVM sanitizers had taken 99 + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) fd = VM_MAKE_TAG(100); #endif - if (large_os_page_size > 0 && use_large_os_page(size, try_alignment)) { + if (use_large_os_page(size, try_alignment)) { int lflags = flags; int lfd = fd; #ifdef MAP_ALIGNED_SUPER @@ -308,7 +310,7 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) #endif // Primitive allocation from the OS. -// Note: the `alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_stats_t* stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); if (size == 0) return NULL; diff --git a/src/page-queue.c b/src/page-queue.c index a386f8a1..e59620c2 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -177,7 +177,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* #endif static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { - uint8_t bin = (page->flags.in_full ? MI_BIN_FULL : _mi_bin(page->block_size)); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); mi_heap_t* heap = page->heap; mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; @@ -187,10 +187,10 @@ static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { } static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { - uint8_t bin = (page->flags.in_full ? MI_BIN_FULL : _mi_bin(page->block_size)); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(page->flags.in_full || page->block_size == pq->block_size); + mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size); return pq; } @@ -245,7 +245,7 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->flags.in_full && mi_page_queue_is_full(queue))); + mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; @@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { page->next = NULL; page->prev = NULL; page->heap = NULL; - page->flags.in_full = false; + mi_page_set_in_full(page,false); } @@ -269,9 +269,9 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || - (page->flags.in_full && mi_page_queue_is_full(queue))); + (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); - page->flags.in_full = mi_page_queue_is_full(queue); + mi_page_set_in_full(page, mi_page_queue_is_full(queue)); page->heap = heap; page->next = queue->first; page->prev = NULL; @@ -324,7 +324,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_heap_queue_first_update(page->heap, to); } - page->flags.in_full = mi_page_queue_is_full(to); + mi_page_set_in_full(page, mi_page_queue_is_full(to)); } size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { diff --git a/src/page.c b/src/page.c index c644a5b3..7ac7535e 100644 --- a/src/page.c +++ b/src/page.c @@ -102,7 +102,7 @@ bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || page->flags.in_full); + mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || mi_page_is_in_full(page)); mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); } return true; @@ -282,26 +282,26 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { void _mi_page_unfull(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); - mi_assert_internal(page->flags.in_full); + mi_assert_internal(mi_page_is_in_full(page)); _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE); - if (!page->flags.in_full) return; + if (!mi_page_is_in_full(page)) return; mi_heap_t* heap = page->heap; mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; - page->flags.in_full = false; // to get the right queue + mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); - page->flags.in_full = true; + mi_page_set_in_full(page, true); mi_page_queue_enqueue_from(pq, pqfull, page); } static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(!mi_page_immediate_available(page)); - mi_assert_internal(!page->flags.in_full); + mi_assert_internal(!mi_page_is_in_full(page)); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE); - if (page->flags.in_full) return; + if (mi_page_is_in_full(page)) return; mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); mi_page_thread_free_collect(page); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set @@ -349,11 +349,16 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); #endif - page->flags.has_aligned = false; + mi_page_set_has_aligned(page, false); // account for huge pages here if (page->block_size > MI_LARGE_SIZE_MAX) { - _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); + if (page->block_size > MI_HUGE_SIZE_MAX) { + _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); + } + else { + _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); + } } // remove from the page list @@ -377,7 +382,7 @@ void _mi_page_retire(mi_page_t* page) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_all_free(page)); - page->flags.has_aligned = false; + mi_page_set_has_aligned(page, false); // don't retire too often.. // (or we end up retiring and re-allocating most of the time) @@ -560,7 +565,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->thread_freed == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); - mi_assert_internal(page->flags.has_aligned == false); + mi_assert_internal(!mi_page_has_aligned(page)); #if MI_SECURE mi_assert_internal(page->cookie != 0); #endif @@ -619,7 +624,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 3. If the page is completely full, move it to the `mi_pages_full` // queue so we don't visit long-lived pages too often. - mi_assert_internal(!page->flags.in_full && !mi_page_immediate_available(page)); + mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); mi_page_to_full(page,pq); page = next; @@ -702,7 +707,14 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); - _mi_stat_increase( &heap->tld->stats.huge, block_size); + if (page->block_size > MI_HUGE_SIZE_MAX) { + _mi_stat_increase(&heap->tld->stats.giant, block_size); + _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); + } + else { + _mi_stat_increase(&heap->tld->stats.huge, block_size); + _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); + } } return page; } diff --git a/src/stats.c b/src/stats.c index 8725e48c..e7d398b2 100644 --- a/src/stats.c +++ b/src/stats.c @@ -106,8 +106,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->malloc, &src->malloc, 1); mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); mi_stat_add(&stats->huge, &src->huge, 1); + mi_stat_add(&stats->giant, &src->giant, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); + mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); + mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) { @@ -152,20 +155,29 @@ static void mi_print_count(int64_t n, int64_t unit, FILE* out) { } static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, FILE* out ) { - _mi_fprintf(out,"%10s:", msg); - mi_print_amount(stat->peak, unit, out); - if (unit!=0) { + _mi_fprintf(out,"%10s:", msg); + if (unit>0) { + mi_print_amount(stat->peak, unit, out); mi_print_amount(stat->allocated, unit, out); mi_print_amount(stat->freed, unit, out); - } - if (unit>0) { - mi_print_amount(unit, (unit==0 ? 0 : 1), out); + mi_print_amount(unit, 1, out); mi_print_count(stat->allocated, unit, out); if (stat->allocated > stat->freed) _mi_fprintf(out, " not all freed!\n"); else _mi_fprintf(out, " ok\n"); } + else if (unit<0) { + mi_print_amount(stat->peak, 1, out); + mi_print_amount(stat->allocated, 1, out); + mi_print_amount(stat->freed, 1, out); + mi_print_amount(-unit, 1, out); + mi_print_count((stat->allocated / -unit), 0, out); + if (stat->allocated > stat->freed) + _mi_fprintf(out, " not all freed!\n"); + else + _mi_fprintf(out, " ok\n"); + } else { _mi_fprintf(out, "\n"); } @@ -219,10 +231,12 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n mi_stat_count_t normal = { 0,0,0,0 }; mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); mi_stat_print(&normal, "normal", 1, out); - mi_stat_print(&stats->huge, "huge", 1, out); + mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); + mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &stats->huge, 1); + mi_stat_add(&total, &stats->giant, 1); mi_stat_print(&total, "total", 1, out); _mi_fprintf(out, "malloc requested: "); mi_print_amount(stats->malloc.allocated, 1, out); diff --git a/test/main-override-static.c b/test/main-override-static.c index 94891cc3..6ddf4f37 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -6,7 +6,6 @@ #include #include // redefines malloc etc. - int main() { mi_version(); void* p1 = malloc(78);