diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 192e14da..57f4a76e 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -449,6 +449,9 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) { } // Segment that contains the pointer +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; +// therefore we align one byte before `p`. static inline mi_segment_t* _mi_ptr_segment(const void* p) { mi_assert_internal(p != NULL); return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0cef11da..de356c76 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -147,7 +147,7 @@ typedef int32_t mi_ssize_t; // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)<= 655360) #error "mimalloc internal: define more bins" #endif -#if (MI_ALIGNMENT_MAX > MI_SEGMENT_SIZE/2) -#error "mimalloc internal: the max aligned boundary is too large for the segment size" -#endif -#if (MI_ALIGNED_MAX % MI_SEGMENT_SLICE_SIZE != 0) -#error "mimalloc internal: the max aligned boundary must be an integral multiple of the segment slice size" -#endif // Maximum slice offset (15) #define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) @@ -182,7 +176,8 @@ typedef int32_t mi_ssize_t; // blocks up to this size are always allocated aligned #define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) - +// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments +#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) // ------------------------------------------------------ diff --git a/include/mimalloc.h b/include/mimalloc.h index 2a875522..4f8fb9d0 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -168,13 +168,6 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -// The MI_ALIGNMENT_MAX is deprecated; any alignment is supported but alignments up to MI_ALIGNMENT_MAX may be cheaper. -#if (INTPTR_MAX > INT32_MAX) -#define MI_ALIGNMENT_MAX (32*1024*1024UL) -#else -#define MI_ALIGNMENT_MAX (2*1024*1024UL) -#endif - mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 66a26b49..910c8d5d 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -34,11 +34,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* size_t oversize; if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) + // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the + // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down) if mi_unlikely(offset != 0) { // todo: cannot support offset alignment for very large alignments yet -#if MI_DEBUG > 0 + #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); -#endif + #endif return NULL; } oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); diff --git a/src/arena.c b/src/arena.c index f952559e..2790ecd6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -48,8 +48,8 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); // Block info: bit 0 contains the `in_use` bit, the upper bits the // size in count of arena blocks. typedef uintptr_t mi_block_info_t; -#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 8MiB (must be at least MI_SEGMENT_ALIGN) -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 4MiB +#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB #define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor @@ -190,22 +190,23 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren return p; } -static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, - bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) -{ +// allocate from an arena with fallback to the OS +static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, + bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) +{ MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); const size_t bcount = mi_block_count_of_size(size); if mi_likely(max_arena == 0) return NULL; - mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE); size_t arena_index = mi_arena_id_index(req_arena_id); if (arena_index < MI_MAX_ARENAS) { // try a specific arena if requested mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); - if (arena != NULL && + if ((arena != NULL) && (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { @@ -220,7 +221,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; // end reached if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); @@ -233,7 +234,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; // end reached if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); @@ -244,7 +245,6 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size return NULL; } - void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { @@ -255,7 +255,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset *is_pinned = false; bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` + if (large == NULL) large = &default_large; // ensure `large != NULL` const int numa_node = _mi_os_numa_node(tld); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) @@ -272,7 +272,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset *is_zero = true; *memid = MI_MEMID_OS; void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats); - if (p != NULL) *is_pinned = *large; + if (p != NULL) { *is_pinned = *large; } return p; } diff --git a/src/os.c b/src/os.c index 0f4ab3e1..90ac7c6c 100644 --- a/src/os.c +++ b/src/os.c @@ -839,29 +839,30 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar /* ----------------------------------------------------------- OS aligned allocation with an offset. This is used - for large alignments > MI_SEGMENT_SIZE so we can align - the first page at an offset from the start of the segment. - As we may need to overallocate, we need to free such pointers - using `mi_free_aligned` to use the actual start of the - memory region. + for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc + page where the object can be aligned at an offset from the start of the segment. + As we may need to overallocate, we need to free such pointers using `mi_free_aligned` + to use the actual start of the memory region. ----------------------------------------------------------- */ - void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { + // regular aligned allocation return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats); } else { + // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats); if (start == NULL) return NULL; void* p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); + // decommit the overallocation at the start if (commit && extra > _mi_os_page_size()) { _mi_os_decommit(start, extra, tld_stats); } diff --git a/src/page.c b/src/page.c index ceee5f60..2fa03606 100644 --- a/src/page.c +++ b/src/page.c @@ -804,6 +804,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex // Because huge pages contain just one block, and the segment contains // just that page, we always treat them as abandoned and any thread // that frees the block can free the whole page and segment directly. +// Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX). static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); @@ -861,6 +862,8 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. +// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for +// very large requested alignments in which case we use a huge segment. void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { mi_assert_internal(heap != NULL); diff --git a/src/region.c b/src/region.c index ea376aa4..f069502f 100644 --- a/src/region.c +++ b/src/region.c @@ -65,8 +65,6 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offs #error "define the maximum heap space allowed for regions on this platform" #endif -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE - #define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) diff --git a/src/segment.c b/src/segment.c index 22b9ccd0..85b84a6b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1580,9 +1580,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) { mi_assert_internal(_mi_is_power_of_two(page_alignment)); mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); - if (page_alignment < MI_SEGMENT_SIZE) { - page_alignment = MI_SEGMENT_SIZE; - } + if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; } page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld); } else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { diff --git a/test/test-api.c b/test/test-api.c index e7f3a4ed..28d3856d 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -34,7 +34,7 @@ we therefore test the API over various inputs. Please add more tests :-) #include "mimalloc.h" // #include "mimalloc-internal.h" -#include "mimalloc-types.h" // for MI_DEBUG +#include "mimalloc-types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX #include "testhelper.h"