diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 3f03b460..44f4cafe 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -199,7 +199,9 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); // "alloc.c" -void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` +void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index e2b9ce38..cc807ee9 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -203,8 +203,8 @@ typedef int32_t mi_ssize_t; #error "mimalloc internal: define more bins" #endif -// blocks up to this size are always allocated aligned -#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) +// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`) +#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX) // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments #define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index b548ec92..67b3dcc2 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -15,15 +15,15 @@ terms of the MIT license. A copy of the license can be found in the file // Aligned Allocation // ------------------------------------------------------ -static inline bool mi_is_naturally_aligned( size_t size, size_t alignment ) { +static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) { // objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`). - // note: the size may not be not an actual bin-size but it turns out the test below is still correct for our - // powers of two bin spacing (see test-api.c:test-aligned13). mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0)); - return (size <= (MI_MAX_ALIGN_GUARANTEE - MI_PADDING_SIZE) && alignment <= size && ((size + MI_PADDING_SIZE) & (alignment-1)) == 0); + if (alignment > size) return false; + if (alignment <= MI_MAX_ALIGN_SIZE) return true; + const size_t bsize = mi_good_size(size); + return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0); } - // Fallback primitive aligned allocation -- split out for better codegen static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { @@ -31,10 +31,18 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); // use regular allocation if it is guaranteed to fit the alignment constraints. - if (offset == 0 && mi_is_naturally_aligned(size,alignment)) { + if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); - return p; + const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0; + if mi_likely(is_aligned_or_null) { + return p; + } + else { + // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct.. + mi_assert(false); + mi_free(p); + } } void* p; @@ -83,7 +91,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* // now zero the block if needed if (alignment > MI_BLOCK_ALIGNMENT_MAX) { - // for the tracker, on huge aligned allocations only from the start of the large block is defined + // for the tracker, on huge aligned allocations only the memory from the start of the large block is defined mi_track_mem_undefined(aligned_p, size); if (zero) { _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); @@ -106,33 +114,35 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t #endif return NULL; } + + // try first if there happens to be a small block available with just the right alignment + if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) { + const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const size_t padsize = size + MI_PADDING_SIZE; + mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); + if mi_likely(page->free != NULL) { + const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; + if mi_likely(is_aligned) + { + #if MI_STAT>1 + mi_heap_stat_increase(heap, malloc, size); + #endif + void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen + mi_assert_internal(p != NULL); + mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); + mi_track_malloc(p,size,zero); + return p; + } + } + } + // fallback if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { // we don't allocate more than MI_MAX_ALLOC_SIZE (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } - const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` - const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size check - - // try first if there happens to be a small block available with just the right alignment - if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { - mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); - const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; - if mi_likely(page->free != NULL && is_aligned) - { - #if MI_STAT>1 - mi_heap_stat_increase(heap, malloc, size); - #endif - void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc - mi_assert_internal(p != NULL); - mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); - mi_track_malloc(p,size,zero); - return p; - } - } - // fallback return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero); } @@ -146,14 +156,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he } mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { - if (alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; - if (size <= MI_SMALL_SIZE_MAX && mi_is_naturally_aligned(size,alignment)) { - // fast path for common alignment and size - return mi_heap_malloc_small(heap, size); - } - else { - return mi_heap_malloc_aligned_at(heap, size, alignment, 0); - } + return mi_heap_malloc_aligned_at(heap, size, alignment, 0); } // ensure a definition is emitted diff --git a/src/alloc.c b/src/alloc.c index 2a2aeb8d..7dccacad 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -28,7 +28,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. // Note: in release mode the (inlined) routine is about 7 instructions with a single test. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept +extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); mi_block_t* const block = page->free; @@ -85,14 +85,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz #endif #if MI_PADDING // && !MI_TRACK_ENABLED - mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); - ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); + ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); #if (MI_DEBUG>=2) mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); #endif - mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess - padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); - padding->delta = (uint32_t)(delta); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess + padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->delta = (uint32_t)(delta); #if MI_PADDING_CHECK if (!mi_page_is_huge(page)) { uint8_t* fill = (uint8_t*)padding - delta; @@ -105,6 +105,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz return block; } +// extra entries for improved efficiency in `alloc-aligned.c`. +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + return _mi_page_malloc_zero(heap,page,size,false); +} +extern inline void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + return _mi_page_malloc_zero(heap,page,size,true); +} + static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { mi_assert(heap != NULL); #if MI_DEBUG @@ -117,7 +125,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, #endif mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); - void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); + void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); #if MI_STAT>1 diff --git a/src/page-queue.c b/src/page-queue.c index 470d1b64..ceea91ee 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -113,10 +113,10 @@ size_t _mi_bin_size(uint8_t bin) { // Good size for allocation size_t mi_good_size(size_t size) mi_attr_noexcept { if (size <= MI_MEDIUM_OBJ_SIZE_MAX) { - return _mi_bin_size(mi_bin(size)); + return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE)); } else { - return _mi_align_up(size,_mi_os_page_size()); + return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size()); } } diff --git a/src/page.c b/src/page.c index 2ea25469..871ed215 100644 --- a/src/page.c +++ b/src/page.c @@ -932,12 +932,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc) if mi_unlikely(zero && page->block_size == 0) { // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case. - void* p = _mi_page_malloc(heap, page, size, false); + void* p = _mi_page_malloc(heap, page, size); mi_assert_internal(p != NULL); _mi_memzero_aligned(p, mi_page_usable_block_size(page)); return p; } else { - return _mi_page_malloc(heap, page, size, zero); + return _mi_page_malloc_zero(heap, page, size, zero); } } diff --git a/src/segment.c b/src/segment.c index f8054697..9ac22f15 100644 --- a/src/segment.c +++ b/src/segment.c @@ -312,20 +312,28 @@ static size_t mi_segment_info_size(mi_segment_t* segment) { return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; } -static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size) +static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t block_size, size_t* page_size) { - ptrdiff_t idx = slice - segment->slices; - size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; + const ptrdiff_t idx = slice - segment->slices; + const size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; + uint8_t* const pstart = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); // make the start not OS page aligned for smaller blocks to avoid page/cache effects - // note: the offset must always be an xblock_size multiple since we assume small allocations + // note: the offset must always be a block_size multiple since we assume small allocations // are aligned (see `mi_heap_malloc_aligned`). size_t start_offset = 0; - if (xblock_size >= MI_INTPTR_SIZE) { - if (xblock_size <= 64) { start_offset = 3*xblock_size; } - else if (xblock_size <= 512) { start_offset = xblock_size; } + if (block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) { + // for small objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + const size_t adjust = block_size - ((uintptr_t)pstart % block_size); + if (adjust < block_size && psize >= block_size + adjust) { + start_offset += adjust; + } + } + if (block_size >= MI_INTPTR_SIZE) { + if (block_size <= 64) { start_offset += 3*block_size; } + else if (block_size <= 512) { start_offset += block_size; } } if (page_size != NULL) { *page_size = psize - start_offset; } - return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); + return (pstart + start_offset); } // Start of the page available memory; can be used on uninitialized pages diff --git a/test/test-api.c b/test/test-api.c index 34bfa0e6..76101980 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -232,15 +232,20 @@ int main(void) { } CHECK_BODY("mimalloc-aligned13") { bool ok = true; - for( size_t size = 1; size <= MI_SMALL_SIZE_MAX && ok; size++ ) { + for( size_t size = 1; size <= (MI_SMALL_SIZE_MAX * 2) && ok; size++ ) { for(size_t align = 1; align <= size && ok; align *= 2 ) { - void* p = mi_malloc_aligned(size,align); - ok = (p != NULL && ((uintptr_t)p % align) == 0); - mi_free(p); + void* p[10]; + for(int i = 0; i < 10 && ok; i++) { + p[i] = mi_malloc_aligned(size,align);; + ok = (p[i] != NULL && ((uintptr_t)(p[i]) % align) == 0); + } + for(int i = 0; i < 10 && ok; i++) { + mi_free(p[i]); + } /* if (ok && align <= size && ((size + MI_PADDING_SIZE) & (align-1)) == 0) { size_t bsize = mi_good_size(size); - ok = (align <= bsize && ((bsize + MI_PADDING_SIZE) & (align-1)) == 0); + ok = (align <= bsize && (bsize & (align-1)) == 0); } */ }