From 711aad7a7538ff6f765f732fcc1fc0000a4b1ef7 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Nov 2022 11:13:29 -0800 Subject: [PATCH 1/3] refactor arena allocation --- src/arena.c | 120 +++++++++++++++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 53 deletions(-) diff --git a/src/arena.c b/src/arena.c index 5aef95f7..0cc569ab 100644 --- a/src/arena.c +++ b/src/arena.c @@ -45,7 +45,6 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); Arena allocation ----------------------------------------------------------- */ -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE #define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 32MiB #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB #define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) @@ -190,8 +189,63 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n return p; } +// allocate from an arena with fallback to the OS +static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, + bool* is_pinned, bool* is_zero, + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) +{ + MI_UNUSED_RELEASE(alignment); + mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + const size_t bcount = mi_block_count_of_size(size); + if mi_likely(max_arena == 0) return NULL; + mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE); + + size_t arena_index = mi_arena_id_index(req_arena_id); + if (arena_index < MI_MAX_ARENAS) { + // try a specific arena if requested + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + if ((arena != NULL) && + (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } + else { + // try numa affine allocation + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; // end reached + if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } + + // try from another numa node instead.. + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; // end reached + if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } + } + return NULL; +} + void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t arena_id, size_t* memid, mi_os_tld_t* tld) + mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); @@ -199,71 +253,31 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset *is_zero = false; *is_pinned = false; - // try to allocate in an arena if the alignment is small enough - // and the object is not too large or too small. - if (alignment <= MI_SEGMENT_ALIGN && align_offset == 0 && - size >= MI_ARENA_MIN_OBJ_SIZE && - mi_atomic_load_relaxed(&mi_arena_count) > 0) - { - const size_t bcount = mi_block_count_of_size(size); - const int numa_node = _mi_os_numa_node(tld); // current numa node - mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + bool default_large = false; + if (large == NULL) large = &default_large; // ensure `large != NULL` + const int numa_node = _mi_os_numa_node(tld); // current numa node - // try specific arena if so requested - size_t arena_index = mi_arena_id_index(arena_id); - if (arena_index < MI_MAX_ARENAS) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); - if ((arena != NULL) && - (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; - } - } - - // try numa affine allocation - for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; - } - } - // try from another numa node instead.. - for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; - } - } + // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { + void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + if (p != NULL) return p; } // finally, fall back to the OS - if (mi_option_is_enabled(mi_option_limit_os_alloc)) { + if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } *is_zero = true; *memid = MI_MEMID_OS; void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats); - if (p != NULL) *is_pinned = *large; + if (p != NULL) { *is_pinned = *large; } return p; } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t arena_id, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, arena_id, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); } From 1f12c3dd12646e24b1c378ff2e0b8542cb0e574f Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Nov 2022 11:13:42 -0800 Subject: [PATCH 2/3] remove MI_ALIGNMENT_MAX and add comments --- include/mimalloc-types.h | 9 +++++---- include/mimalloc.h | 1 - src/alloc-aligned.c | 4 ++-- src/os.c | 13 +++++++------ src/segment.c | 4 +--- test/test-api.c | 2 +- 6 files changed, 16 insertions(+), 17 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 79d04d2d..e54c4171 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -139,7 +139,8 @@ typedef int32_t mi_ssize_t; // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)<= 655360) #error "mimalloc internal: define more bins" #endif -#if (MI_ALIGNMENT_MAX > MI_SEGMENT_SIZE/2) -#error "mimalloc internal: the max aligned boundary is too large for the segment size" -#endif // Used as a special value to encode block sizes in 32 bits. #define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) +// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments +#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) + // ------------------------------------------------------ // Mimalloc pages contain allocated blocks diff --git a/include/mimalloc.h b/include/mimalloc.h index 17fd1c60..34644183 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -166,7 +166,6 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s // Note that `alignment` always follows `size` for consistency with unaligned // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -#define MI_ALIGNMENT_MAX (2*1024*1024UL) // maximum supported alignment is 1MiB mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 3ce01f5c..86b0112b 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -36,9 +36,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) if mi_unlikely(offset != 0) { // todo: cannot support offset alignment for very large alignments yet -#if MI_DEBUG > 0 + #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); -#endif + #endif return NULL; } oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); diff --git a/src/os.c b/src/os.c index 57b34a2c..c2d53e5b 100644 --- a/src/os.c +++ b/src/os.c @@ -842,29 +842,30 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar /* ----------------------------------------------------------- OS aligned allocation with an offset. This is used - for large alignments > MI_SEGMENT_SIZE so we can align - the first page at an offset from the start of the segment. - As we may need to overallocate, we need to free such pointers - using `mi_free_aligned` to use the actual start of the - memory region. + for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc + page where the object can be aligned at an offset from the start of the segment. + As we may need to overallocate, we need to free such pointers using `mi_free_aligned` + to use the actual start of the memory region. ----------------------------------------------------------- */ - void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { + // regular aligned allocation return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats); } else { + // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats); if (start == NULL) return NULL; void* p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); + // decommit the overallocation at the start if (commit && extra > _mi_os_page_size()) { _mi_os_decommit(start, extra, tld_stats); } diff --git a/src/segment.c b/src/segment.c index 5274aa1d..798aa756 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1317,9 +1317,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag mi_assert_internal(_mi_is_power_of_two(page_alignment)); mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0); - if (page_alignment < MI_SEGMENT_SIZE) { - page_alignment = MI_SEGMENT_SIZE; - } + if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; } page = mi_segment_huge_page_alloc(block_size, page_alignment, tld, os_tld); } else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { diff --git a/test/test-api.c b/test/test-api.c index 312b3f1b..a16ef381 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -34,7 +34,7 @@ we therefore test the API over various inputs. Please add more tests :-) #include "mimalloc.h" // #include "mimalloc-internal.h" -#include "mimalloc-types.h" // for MI_DEBUG +#include "mimalloc-types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX #include "testhelper.h" From 2daec6c72f62597b8f87e28caecba028102c053a Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Nov 2022 11:23:04 -0800 Subject: [PATCH 3/3] add more comments --- include/mimalloc-internal.h | 3 +++ src/alloc-aligned.c | 2 ++ src/page.c | 3 +++ src/region.c | 2 -- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 4620fb72..53c0340a 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -425,6 +425,9 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) { } // Segment that contains the pointer +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; +// therefore we align one byte before `p`. static inline mi_segment_t* _mi_ptr_segment(const void* p) { mi_assert_internal(p != NULL); return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 86b0112b..db80baee 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -34,6 +34,8 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* size_t oversize; if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) + // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the + // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down) if mi_unlikely(offset != 0) { // todo: cannot support offset alignment for very large alignments yet #if MI_DEBUG > 0 diff --git a/src/page.c b/src/page.c index e359d5bb..7c3a30a8 100644 --- a/src/page.c +++ b/src/page.c @@ -795,6 +795,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex // Because huge pages contain just one block, and the segment contains // just that page, we always treat them as abandoned and any thread // that frees the block can free the whole page and segment directly. +// Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX). static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); @@ -844,6 +845,8 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. +// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for +// very large requested alignments in which case we use a huge segment. void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { mi_assert_internal(heap != NULL); diff --git a/src/region.c b/src/region.c index ea376aa4..f069502f 100644 --- a/src/region.c +++ b/src/region.c @@ -65,8 +65,6 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offs #error "define the maximum heap space allowed for regions on this platform" #endif -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE - #define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)