From 9ebe941ce0cb4705e584c7c638b7345458c6e79c Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 30 Nov 2024 20:21:32 -0800 Subject: [PATCH] first version that passes the make test --- include/mimalloc/internal.h | 36 +++++++++++++---- include/mimalloc/types.h | 20 +++++----- src/alloc-aligned.c | 40 +++++++++---------- src/arena.c | 78 +++++++++++++++++++++++++------------ src/bitmap.c | 2 +- src/free.c | 13 ++++--- src/page-map.c | 13 ++++--- src/page.c | 26 ++++++++++--- test/test-api.c | 14 +++---- test/test-stress.c | 4 +- 10 files changed, 155 insertions(+), 91 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 39bc23eb..02a62bec 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -440,16 +440,34 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si extern uint8_t* _mi_page_map; -#define MI_PAGE_PTR_INVALID ((mi_page_t*)(1)) +static inline mi_page_t* _mi_ptr_page_ex(const void* p, bool* valid) { + #if 1 + const uintptr_t idx = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT; + const size_t ofs = _mi_page_map[idx]; + if (valid != NULL) *valid = (ofs != 0); + return (mi_page_t*)((idx - ofs + 1) << MI_ARENA_SLICE_SHIFT); + #else + const uintptr_t idx = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT; + const uintptr_t up = idx << MI_ARENA_SLICE_SHIFT; + __builtin_prefetch((void*)up); + const size_t ofs = _mi_page_map[idx]; + if (valid != NULL) *valid = (ofs != 0); + return (mi_page_t*)(up - ((ofs - 1) << MI_ARENA_SLICE_SHIFT)); + #endif +} + +static inline mi_page_t* _mi_checked_ptr_page(const void* p) { + bool valid; + mi_page_t* const page = _mi_ptr_page_ex(p,&valid); + return (valid ? page : NULL); +} static inline mi_page_t* _mi_ptr_page(const void* p) { - const uintptr_t up = ((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT; - // __builtin_prefetch((void*)(up << MI_ARENA_SLICE_SHIFT)); - const ptrdiff_t ofs = _mi_page_map[up]; #if MI_DEBUG - if mi_unlikely(ofs==0) return MI_PAGE_PTR_INVALID; + return _mi_checked_ptr_page(p); + #else + return _mi_ptr_page_ex(p,NULL); #endif - return (mi_page_t*)((up - ofs + 1) << MI_ARENA_SLICE_SHIFT); } @@ -509,12 +527,13 @@ static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) { static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); - mi_atomic_store_release(&page->xheap,(uintptr_t)heap); if (heap != NULL) { + mi_atomic_store_release(&page->xheap, (uintptr_t)heap); page->heap_tag = heap->tag; mi_atomic_store_release(&page->xthread_id, heap->thread_id); } else { + mi_atomic_store_release(&page->xheap, (uintptr_t)mi_page_heap(page)->tld->subproc); mi_atomic_store_release(&page->xthread_id,0); } } @@ -578,11 +597,12 @@ static inline bool mi_page_mostly_used(const mi_page_t* page) { } static inline bool mi_page_is_abandoned(const mi_page_t* page) { + // note: the xheap field of an abandoned heap is set to the subproc (for fast reclaim-on-free) return (mi_page_thread_id(page) == 0); } static inline bool mi_page_is_huge(const mi_page_t* page) { - return (page->block_size > MI_LARGE_MAX_OBJ_SIZE); + return (page->block_size > MI_LARGE_MAX_OBJ_SIZE || (mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.alignment > MI_PAGE_MAX_OVERALLOC_ALIGN)); } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index f82265fb..271c7efb 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -123,15 +123,16 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_BITMAP_CHUNK_BITS_SHIFT 8 // optimized for 256 bits per chunk (avx2) #endif +#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT) #define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) #define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE) -#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT) -#define MI_ARENA_MIN_OBJ_BLOCKS (1) -#define MI_ARENA_MAX_OBJ_BLOCKS (MI_BITMAP_CHUNK_BITS) // for now, cannot cross chunk boundaries +#define MI_ARENA_MIN_OBJ_SLICES (1) +#define MI_ARENA_MAX_OBJ_SLICES (MI_SIZE_BITS) // for now, cannot cross bit field boundaries.. todo: make it at least MI_BITMAP_CHUNK_BITS ? (16 MiB) +// #define MI_ARENA_MAX_OBJ_BLOCKS (MI_BITMAP_CHUNK_BITS) // for now, cannot cross chunk boundaries -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_BLOCKS * MI_ARENA_SLICE_SIZE) -#define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_BLOCKS * MI_ARENA_SLICE_SIZE) +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE) +#define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE) #define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE #define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bitmap) @@ -144,9 +145,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_BIN_COUNT (MI_BIN_FULL+1) -// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in singleton pages -#define MI_BLOCK_ALIGNMENT_MAX (MI_ARENA_SLICE_ALIGN) - // We never allocate more than PTRDIFF_MAX (see also ) #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX @@ -318,8 +316,10 @@ typedef struct mi_page_s { // Object sizes // ------------------------------------------------------ -#define MI_PAGE_ALIGN (64) -#define MI_PAGE_INFO_SIZE (2*MI_PAGE_ALIGN) // should be > sizeof(mi_page_t) +#define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map. +#define MI_PAGE_MIN_BLOCK_ALIGN (32) // minimal block alignment in a page +#define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation +#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+1)*MI_PAGE_MIN_BLOCK_ALIGN) // >= sizeof(mi_page_t) // The max object size are checked to not waste more than 12.5% internally over the page sizes. // (Except for large pages since huge objects are allocated in 4MiB chunks) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 84f49ec6..9673334a 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -16,12 +16,11 @@ terms of the MIT license. A copy of the license can be found in the file // ------------------------------------------------------ static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) { - // objects up to `MI_PAGE_ALIGN` are allocated aligned to their size + // objects up to `MI_PAGE_MIN_BLOCK_ALIGN` are always allocated aligned to their size mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0)); if (alignment > size) return false; - if (alignment <= MI_MAX_ALIGN_SIZE) return true; const size_t bsize = mi_good_size(size); - return (bsize <= MI_PAGE_ALIGN && (bsize & (alignment-1)) == 0); + return (bsize <= MI_PAGE_MIN_BLOCK_ALIGN && (bsize & (alignment-1)) == 0); } #if MI_GUARDED @@ -39,9 +38,9 @@ static mi_decl_restrict void* mi_heap_malloc_guarded_aligned(mi_heap_t* heap, si static void* mi_heap_malloc_zero_no_guarded(mi_heap_t* heap, size_t size, bool zero) { const size_t rate = heap->guarded_sample_rate; - heap->guarded_sample_rate = 0; + if (rate != 0) { heap->guarded_sample_rate = 0; } // don't write to constant heap_empty void* p = _mi_heap_malloc_zero(heap, size, zero); - heap->guarded_sample_rate = rate; + if (rate != 0) { heap->guarded_sample_rate = rate; } return p; } #else @@ -58,21 +57,20 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t void* p; size_t oversize; - if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) { - // use OS allocation for very large alignment and allocate inside a huge page (not in an arena) - // This can support alignments >= MI_PAGE_ALIGN by ensuring the object can be aligned at a point in the - // first (and single) page such that the page info is `MI_ARENA_SLICE_SIZE` bytes before it (and can be found in the _mi_page_map). + if mi_unlikely(alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) { + // use OS allocation for large alignments and allocate inside a singleton page (not in an arena) + // This can support alignments >= MI_PAGE_ALIGN by ensuring the object can be aligned + // in the first (and single) page such that the page info is `MI_PAGE_ALIGN` bytes before it (and can be found in the _mi_page_map). if mi_unlikely(offset != 0) { // todo: cannot support offset alignment for very large alignments yet -#if MI_DEBUG > 0 - _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); -#endif + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation with a large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); + #endif return NULL; } oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); // note: no guarded as alignment > 0 - p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block - // zero afterwards as only the area from the aligned_p may be committed! + p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block if (p == NULL) return NULL; } else { @@ -113,13 +111,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t #endif // now zero the block if needed - if (alignment > MI_BLOCK_ALIGNMENT_MAX) { - // for the tracker, on huge aligned allocations only from the start of the large block is defined - mi_track_mem_undefined(aligned_p, size); - if (zero) { - _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); - } - } + //if (alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) { + // // for the tracker, on huge aligned allocations only from the start of the large block is defined + // mi_track_mem_undefined(aligned_p, size); + // if (zero) { + // _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); + // } + //} if (p != aligned_p) { mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p)); diff --git a/src/arena.c b/src/arena.c index b59f8ad3..a2d3f560 100644 --- a/src/arena.c +++ b/src/arena.c @@ -354,9 +354,9 @@ static mi_decl_noinline void* mi_arena_try_alloc( bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) { - mi_assert(slice_count <= MI_ARENA_MAX_OBJ_BLOCKS); + mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); - + // try to find free slices in the arena's void* p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; @@ -469,33 +469,48 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl return NULL; } -static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_size, mi_arena_id_t req_arena_id, mi_tld_t* tld) +static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_size, size_t block_alignment, + mi_arena_id_t req_arena_id, mi_tld_t* tld) { const bool allow_large = true; const bool commit = true; - const size_t alignment = 1; - + const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN); + const size_t page_alignment = MI_ARENA_SLICE_ALIGN; + // try to allocate from free space in arena's mi_memid_t memid = _mi_memid_none(); mi_page_t* page = NULL; - if (_mi_option_get_fast(mi_option_disallow_arena_alloc)==0 && req_arena_id == _mi_arena_id_none()) { - page = (mi_page_t*)mi_arena_try_alloc(slice_count, alignment, commit, allow_large, req_arena_id, &memid, tld); + if (!_mi_option_get_fast(mi_option_disallow_arena_alloc) && // allowed to allocate from arena's? + !os_align && // not large alignment + slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large + { + page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, &memid, tld); } // otherwise fall back to the OS if (page == NULL) { - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), alignment, 0 /* align offset */, commit, allow_large, req_arena_id, &memid, tld); + if (os_align) { + // note: slice_count already includes the page + mi_assert_internal(slice_count >= mi_slice_count_of_size(block_size) + mi_slice_count_of_size(page_alignment)); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena_id, &memid, tld); + } + else { + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena_id, &memid, tld); + } } if (page == NULL) return NULL; + mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); + mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment)); // claimed free slices: initialize the page partly - _mi_memzero_aligned(page, sizeof(*page)); - mi_assert(MI_PAGE_INFO_SIZE >= _mi_align_up(sizeof(*page), MI_PAGE_ALIGN)); - const size_t reserved = (mi_size_of_slices(slice_count) - MI_PAGE_INFO_SIZE) / block_size; + if (!memid.initially_zero) { _mi_memzero_aligned(page, sizeof(*page)); } + mi_assert(MI_PAGE_INFO_SIZE >= _mi_align_up(sizeof(*page), MI_PAGE_MIN_BLOCK_ALIGN)); + const size_t block_start = (os_align ? MI_PAGE_ALIGN : MI_PAGE_INFO_SIZE); + const size_t reserved = (os_align ? 1 : (mi_size_of_slices(slice_count) - block_start) / block_size); mi_assert_internal(reserved > 0 && reserved <= UINT16_MAX); page->reserved = (uint16_t)reserved; - page->page_start = (uint8_t*)page + MI_PAGE_INFO_SIZE; + page->page_start = (uint8_t*)page + block_start; page->block_size = block_size; page->memid = memid; page->free_is_zero = memid.initially_zero; @@ -523,7 +538,7 @@ static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t slice_count, size } // 2. find a free block, potentially allocating a new arena - page = mi_arena_page_alloc_fresh(slice_count, block_size, req_arena_id, tld); + page = mi_arena_page_alloc_fresh(slice_count, block_size, 1, req_arena_id, tld); if (page != NULL) { mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count); _mi_page_init(heap, page); @@ -534,18 +549,27 @@ static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t slice_count, size } -static mi_page_t* mi_singleton_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment) { - MI_UNUSED(heap); MI_UNUSED(block_size); MI_UNUSED(page_alignment); - _mi_error_message(EINVAL, "singleton page is not yet implemented\n"); - return NULL; +static mi_page_t* mi_singleton_page_alloc(mi_heap_t* heap, size_t block_size, size_t block_alignment) { + const mi_arena_id_t req_arena_id = heap->arena_id; + mi_tld_t* const tld = heap->tld; + const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN); + const size_t info_size = (os_align ? MI_PAGE_ALIGN : MI_PAGE_INFO_SIZE); + const size_t slice_count = mi_slice_count_of_size(info_size + block_size); + + mi_page_t* page = mi_arena_page_alloc_fresh(slice_count, block_size, block_alignment, req_arena_id, tld); + if (page == NULL) return NULL; + + mi_assert(page != NULL); + mi_assert(page->reserved == 1); + return page; } -mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment) { +mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t block_alignment) { mi_page_t* page; - if mi_unlikely(page_alignment > MI_BLOCK_ALIGNMENT_MAX) { - mi_assert_internal(_mi_is_power_of_two(page_alignment)); - page = mi_singleton_page_alloc(heap, block_size, page_alignment); + if mi_unlikely(block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) { + mi_assert_internal(_mi_is_power_of_two(block_alignment)); + page = mi_singleton_page_alloc(heap, block_size, block_alignment); } else if (block_size <= MI_SMALL_MAX_OBJ_SIZE) { page = mi_arena_page_allocN(heap, mi_slice_count_of_size(MI_SMALL_PAGE_SIZE), block_size); @@ -557,7 +581,7 @@ mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_ page = mi_arena_page_allocN(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size); } else { - page = mi_singleton_page_alloc(heap, block_size, page_alignment); + page = mi_singleton_page_alloc(heap, block_size, block_alignment); } // mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc); return page; @@ -598,7 +622,10 @@ void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld) { bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) { if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_is_abandoned(page)); } - if (!mi_page_is_abandoned(page)) return false; // it is not abandoned + // if (!mi_page_is_abandoned(page)) return false; // it is not abandoned (anymore) + + // note: we can access the page even it is in the meantime reclaimed by another thread since + // we only call this when on free (and thus there is still an object alive in the page) mi_memid_t memid = page->memid; if (!_mi_arena_memid_is_suitable(memid, heap->arena_id)) return false; // don't reclaim between exclusive and non-exclusive arena's @@ -623,11 +650,12 @@ bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) { } else { // A page in OS or external memory + if (mi_atomic_load_acquire(&page->xheap) != (uintptr_t)heap->tld->subproc) return false; + // we use the thread_id to atomically grab ownership - // TODO: respect the subproc -- do we need to add this to the page? mi_threadid_t abandoned_thread_id = 0; if (mi_atomic_cas_strong_acq_rel(&page->xthread_id, &abandoned_thread_id, heap->thread_id)) { - // we unabandoned partly + // we got it atomically _mi_page_reclaim(heap, page); mi_assert_internal(!mi_page_is_abandoned(page)); return true; diff --git a/src/bitmap.c b/src/bitmap.c index fe44bb67..dd1afe75 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -263,7 +263,7 @@ restore: // set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. // todo: try neon version static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t* pidx) { -#if 0 && defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) +#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) while (true) { const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256())); // (elem64 == ~0 / 0 ? 0xFF : 0) diff --git a/src/free.c b/src/free.c index 5dbea4a4..c7d92292 100644 --- a/src/free.c +++ b/src/free.c @@ -115,7 +115,7 @@ static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg) #endif mi_page_t* const page = _mi_ptr_page(p); #if MI_DEBUG - if (page == MI_PAGE_PTR_INVALID) { + if (page == NULL && p != NULL) { _mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p); } #endif @@ -126,11 +126,9 @@ static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg) // Fast path written carefully to prevent register spilling on the stack void mi_free(void* p) mi_attr_noexcept { - if (p==NULL) return; mi_page_t* const page = mi_checked_ptr_page(p,"mi_free"); - // if mi_unlikely(page==NULL) return; - - + if mi_unlikely(page==NULL) return; + const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page)); if mi_likely(is_local) { // thread-local free? if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) @@ -235,11 +233,14 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block { // the page is abandoned, try to reclaim it into our heap if (_mi_arena_try_reclaim(mi_heap_get_default(), page)) { // TODO: avoid putting it in the full free queue - mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page)); + mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page)); // mi_assert_internal(mi_heap_get_default()->tld->subproc == page->subproc); mi_free(block); // recursively free as now it will be a local free in our heap return; } + else { + mi_assert_internal(!mi_page_is_singleton(page)); // we should have succeeded on singleton pages + } } // The padding check may access the non-thread-owned page for the key values. diff --git a/src/page-map.c b/src/page-map.c index 624f615c..a3e9a649 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -37,7 +37,8 @@ static bool mi_page_map_init(void) { // commit the first part so NULL pointers get resolved without an access violation if (!mi_page_map_all_committed) { _mi_os_commit(_mi_page_map, _mi_os_page_size(), NULL, NULL); - _mi_page_map[0] = -1; // so _mi_ptr_page(NULL) == NULL + _mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL + mi_assert_internal(_mi_ptr_page(NULL)==NULL); } return true; } @@ -60,9 +61,9 @@ static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) { static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* slice_count) { size_t page_size; *page_start = mi_page_area(page, &page_size); - if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE; } // furthest interior pointer - *slice_count = mi_slice_count_of_size(page_size); - return ((uintptr_t)*page_start >> MI_ARENA_SLICE_SHIFT); + if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer + *slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks + return ((uintptr_t)page >> MI_ARENA_SLICE_SHIFT); } @@ -79,9 +80,9 @@ void _mi_page_map_register(mi_page_t* page) { mi_page_map_ensure_committed(idx, slice_count); // set the offsets - for (int i = 0; i < (int)slice_count; i++) { + for (size_t i = 0; i < slice_count; i++) { mi_assert_internal(i < 128); - _mi_page_map[idx + i] = (i+1); + _mi_page_map[idx + i] = (uint8_t)(i+1); } } diff --git a/src/page.c b/src/page.c index d91b9123..af55b3b3 100644 --- a/src/page.c +++ b/src/page.c @@ -41,9 +41,10 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { + mi_assert_internal(_mi_ptr_page(page) == page); size_t count = 0; while (head != NULL) { - mi_assert_internal(page == _mi_ptr_page(head)); + mi_assert_internal((uint8_t*)head - (uint8_t*)page > MI_LARGE_PAGE_SIZE || page == _mi_ptr_page(head)); count++; head = mi_block_next(page, head); } @@ -123,7 +124,7 @@ bool _mi_page_is_valid(mi_page_t* page) { { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_MAX_OBJ_SIZE || mi_page_is_in_full(page)); + mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_is_huge(page) || mi_page_is_in_full(page)); mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); } } @@ -258,7 +259,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); // TODO: push on full queue immediately if it is full? - mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); + mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -279,6 +280,15 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size } if (mi_page_is_abandoned(page)) { _mi_page_reclaim(heap, page); + if (!mi_page_immediate_available(page)) { + if (mi_page_is_expandable(page)) { + mi_page_extend_free(heap, page); + } + else { + mi_assert(false); // should not happen? + return NULL; + } + } } else if (pq != NULL) { mi_page_queue_push(heap, pq, page); @@ -295,7 +305,7 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0); if (page==NULL) return NULL; mi_assert_internal(pq->block_size==mi_page_block_size(page)); - mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); + mi_assert_internal(pq==mi_heap_page_queue_of(heap, page)); return page; } @@ -713,7 +723,7 @@ void _mi_page_init(mi_heap_t* heap, mi_page_t* page) { -------------------------------------------------------------*/ // search for a best next page to use for at most N pages (often cut short if immediate blocks are available) -#define MI_MAX_CANDIDATE_SEARCH (0) +#define MI_MAX_CANDIDATE_SEARCH (8) // Find a page with free blocks of `page->block_size`. @@ -723,7 +733,9 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p #if MI_STAT size_t count = 0; #endif + #if MI_MAX_CANDIDATE_SEARCH > 1 size_t candidate_count = 0; // we reset this on the first candidate to limit the search + #endif mi_page_t* page_candidate = NULL; // a page with free space mi_page_t* page = pq->first; @@ -793,17 +805,21 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_assert_internal(mi_page_is_expandable(page)); mi_page_extend_free(heap, page); } + mi_assert_internal(mi_page_immediate_available(page)); } if (page == NULL) { _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); if (page == NULL && first_try) { // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again page = mi_page_queue_find_free_ex(heap, pq, false); + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); } } else { + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); // move the page to the front of the queue mi_page_queue_move_to_front(heap, pq, page); page->retire_expire = 0; diff --git a/test/test-api.c b/test/test-api.c index 15484544..ee7c56bb 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -34,7 +34,7 @@ we therefore test the API over various inputs. Please add more tests :-) #include "mimalloc.h" // #include "mimalloc/internal.h" -#include "mimalloc/types.h" // for MI_DEBUG and MI_BLOCK_ALIGNMENT_MAX +#include "mimalloc/types.h" // for MI_DEBUG and MI_PAGE_MAX_OVERALLOC_ALIGN #include "testhelper.h" @@ -169,7 +169,7 @@ int main(void) { /* CHECK_BODY("malloc-aligned6") { bool ok = true; - for (size_t align = 1; align <= MI_BLOCK_ALIGNMENT_MAX && ok; align *= 2) { + for (size_t align = 1; align <= MI_PAGE_MAX_OVERALLOC_ALIGN && ok; align *= 2) { void* ps[8]; for (int i = 0; i < 8 && ok; i++) { ps[i] = mi_malloc_aligned(align*13 // size @@ -186,16 +186,16 @@ int main(void) { }; */ CHECK_BODY("malloc-aligned7") { - void* p = mi_malloc_aligned(1024,MI_BLOCK_ALIGNMENT_MAX); + void* p = mi_malloc_aligned(1024,MI_PAGE_MAX_OVERALLOC_ALIGN); mi_free(p); - result = ((uintptr_t)p % MI_BLOCK_ALIGNMENT_MAX) == 0; + result = ((uintptr_t)p % MI_PAGE_MAX_OVERALLOC_ALIGN) == 0; }; CHECK_BODY("malloc-aligned8") { bool ok = true; for (int i = 0; i < 5 && ok; i++) { int n = (1 << i); - void* p = mi_malloc_aligned(1024, n * MI_BLOCK_ALIGNMENT_MAX); - ok = ((uintptr_t)p % (n*MI_BLOCK_ALIGNMENT_MAX)) == 0; + void* p = mi_malloc_aligned(1024, n * MI_PAGE_MAX_OVERALLOC_ALIGN); + ok = ((uintptr_t)p % (n*MI_PAGE_MAX_OVERALLOC_ALIGN)) == 0; mi_free(p); } result = ok; @@ -203,7 +203,7 @@ int main(void) { CHECK_BODY("malloc-aligned9") { // test large alignments bool ok = true; void* p[8]; - size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 }; + size_t sizes[8] = { 8, 512, 1024 * 1024, MI_PAGE_MAX_OVERALLOC_ALIGN, MI_PAGE_MAX_OVERALLOC_ALIGN + 1, 2 * MI_PAGE_MAX_OVERALLOC_ALIGN, 8 * MI_PAGE_MAX_OVERALLOC_ALIGN, 0 }; for (int i = 0; i < 28 && ok; i++) { int align = (1 << i); for (int j = 0; j < 8 && ok; j++) { diff --git a/test/test-stress.c b/test/test-stress.c index e2133f7d..76dfe877 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -42,7 +42,7 @@ static int SCALE = 10; static int ITER = 10; #elif 0 static int THREADS = 4; -static int SCALE = 20; +static int SCALE = 100; static int ITER = 20; #else static int THREADS = 32; // more repeatable if THREADS <= #processors @@ -54,7 +54,7 @@ static int ITER = 50; // N full iterations destructing and re-creating a #define STRESS // undefine for leak test -static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100) +static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100) static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? static bool main_participates = false; // main thread participates as a worker too