From 46afcbe06cd0000eeda5400fba7eb23453237b8c Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 29 Nov 2024 14:28:34 -0800 Subject: [PATCH] wip: further progress on segment removal; arena allocation --- include/mimalloc/internal.h | 7 +- include/mimalloc/types.h | 17 +- src/arena-page.c | 20 ++ src/arena.c | 368 ++++++++++++++++++++++++++---------- src/bitmap.c | 16 +- src/bitmap.h | 6 +- src/page-map.c | 8 +- src/page.c | 56 +++--- 8 files changed, 344 insertions(+), 154 deletions(-) create mode 100644 src/arena-page.c diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2713c0ac..d60b0c15 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -137,6 +137,9 @@ bool _mi_arena_contains(const void* p); void _mi_arenas_collect(bool force_purge, mi_stats_t* stats); void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); +mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment); +void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld); +void _mi_arena_page_free(mi_page_t* page, mi_tld_t* tld); void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid); void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size); @@ -181,6 +184,7 @@ void _mi_deferred_free(mi_heap_t* heap, bool force); void _mi_page_free_collect(mi_page_t* page,bool force); void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments +void _mi_page_init(mi_heap_t* heap, mi_page_t* page); size_t _mi_bin_size(uint8_t bin); // for stats uint8_t _mi_bin(size_t size); // for stats @@ -453,8 +457,7 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { // Page start static inline uint8_t* mi_page_start(const mi_page_t* page) { - mi_assert(sizeof(mi_page_t) <= MI_PAGE_INFO_SIZE); - return (uint8_t*)page + MI_PAGE_INFO_SIZE; + return page->page_start; } static inline uint8_t* mi_page_area(const mi_page_t* page, size_t* size) { diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 98664020..591cb603 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -127,8 +127,11 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ARENA_BLOCK_ALIGN (MI_ARENA_BLOCK_SIZE) #define MI_BITMAP_CHUNK_BITS (MI_ZU(1) << MI_BITMAP_CHUNK_BITS_SHIFT) -#define MI_ARENA_MIN_OBJ_SIZE MI_ARENA_BLOCK_SIZE -#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_CHUNK_BITS * MI_ARENA_BLOCK_SIZE) // for now, cannot cross chunk boundaries +#define MI_ARENA_MIN_OBJ_BLOCKS (1) +#define MI_ARENA_MAX_OBJ_BLOCKS (MI_BITMAP_CHUNK_BITS) // for now, cannot cross chunk boundaries + +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_BLOCKS * MI_ARENA_BLOCK_SIZE) +#define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_BLOCKS * MI_ARENA_BLOCK_SIZE) #define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE #define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bitmap) @@ -141,7 +144,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_BIN_COUNT (MI_BIN_FULL+1) -// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated orphan pages +// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in singleton pages #define MI_BLOCK_ALIGNMENT_MAX (MI_ARENA_BLOCK_ALIGN) // We never allocate more than PTRDIFF_MAX (see also ) @@ -279,7 +282,6 @@ typedef struct mi_subproc_s mi_subproc_t; // the owning heap `thread_delayed_free` list. This guarantees that pages // will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { - mi_memid_t memid; // provenance of the page memory uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation) uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) @@ -293,6 +295,7 @@ typedef struct mi_page_s { uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type // padding size_t block_size; // size available in each block (always `>0`) + uint8_t* page_start; // start of the blocks #if (MI_ENCODE_FREELIST || MI_PADDING) uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary @@ -304,6 +307,7 @@ typedef struct mi_page_s { struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` + mi_memid_t memid; // provenance of the page memory } mi_page_t; @@ -312,7 +316,7 @@ typedef struct mi_page_s { // ------------------------------------------------------ #define MI_PAGE_ALIGN (64) -#define MI_PAGE_INFO_SIZE (MI_SIZE_SHIFT*MI_PAGE_ALIGN) // should be > sizeof(mi_page_t) +#define MI_PAGE_INFO_SIZE (2*MI_PAGE_ALIGN) // should be > sizeof(mi_page_t) // The max object size are checked to not waste more than 12.5% internally over the page sizes. // (Except for large pages since huge objects are allocated in 4MiB chunks) @@ -532,7 +536,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // ------------------------------------------------------ struct mi_subproc_s { - _Atomic(size_t) abandoned_count; // count of abandoned pages for this sub-process + _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // count of abandoned pages for this sub-process _Atomic(size_t) abandoned_os_list_count; // count of abandoned pages in the os-list mi_lock_t abandoned_os_lock; // lock for the abandoned os pages list (outside of arena's) (this lock protect list operations) mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list @@ -562,6 +566,7 @@ struct mi_tld_s { mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) mi_subproc_t* subproc; // sub-process this thread belongs to. + size_t tseq; // thread sequence id mi_os_tld_t os; // os tld mi_stats_t stats; // statistics }; diff --git a/src/arena-page.c b/src/arena-page.c new file mode 100644 index 00000000..93d25dbf --- /dev/null +++ b/src/arena-page.c @@ -0,0 +1,20 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- + +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "bitmap.h" + + +/* ----------------------------------------------------------- + Arena allocation +----------------------------------------------------------- */ + diff --git a/src/arena.c b/src/arena.c index 28ad61f1..c9f8400b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,6 +42,7 @@ typedef struct mi_arena_s { bool is_large; // memory area consists of large- or huge OS pages (always committed) mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. + mi_subproc_t* subproc; mi_bitmap_t blocks_free; // is the block free? mi_bitmap_t blocks_committed; // is the block committed? (i.e. accessible) @@ -99,6 +100,9 @@ mi_arena_t* mi_arena_from_index(size_t idx) { return mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[idx]); } +mi_arena_t* mi_arena_from_id(mi_arena_id_t id) { + return mi_arena_from_index(mi_arena_id_index(id)); +} /* ----------------------------------------------------------- @@ -164,14 +168,11 @@ bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, size_t* block Arena Allocation ----------------------------------------------------------- */ -static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool commit, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) -{ - MI_UNUSED(arena_index); - mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); - +static mi_decl_noinline void* mi_arena_try_alloc_at( + mi_arena_t* arena, size_t needed_bcount, bool commit, size_t tseq, mi_memid_t* memid) +{ size_t block_index; - if (!mi_bitmap_try_find_and_clearN(&arena->blocks_free, tseq, needed_bcount, &block_index)) return NULL; + if (!mi_bitmap_try_find_and_clearN(&arena->blocks_free, needed_bcount, tseq, &block_index)) return NULL; // claimed it! void* p = mi_arena_block_start(arena, block_index); @@ -192,7 +193,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount, &all_already_committed); if (!all_already_committed) { bool commit_zero = false; - if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) { + if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, NULL)) { memid->initially_committed = false; } else { @@ -205,75 +206,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar memid->initially_committed = mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount); } + mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->blocks_free, block_index, needed_bcount)); + if (commit) { mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount)); } + mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_dirty, block_index, needed_bcount)); + // mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->blocks_purge, block_index, needed_bcount)); + return p; } -// allocate in a speficic arena -static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, - size_t size, size_t alignment, - bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) -{ - mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN); - if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL; - - const size_t bcount = mi_block_count_of_size(size); - const size_t arena_index = mi_arena_id_index(arena_id); - mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); - mi_assert_internal(size <= mi_size_of_blocks(bcount)); - - // Check arena suitability - mi_arena_t* arena = mi_arena_from_index(arena_index); - if (arena == NULL) return NULL; - if (!allow_large && arena->is_large) return NULL; - if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; - if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity - const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); - if (match_numa_node) { if (!numa_suitable) return NULL; } - else { if (numa_suitable) return NULL; } - } - - // try to allocate - void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, tseq, memid, tld); - mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment)); - return p; -} - - -// allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, - bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) -{ - mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN); - if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL; - - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - if mi_likely(max_arena == 0) return NULL; - - if (req_arena_id != _mi_arena_id_none()) { - // try a specific arena if requested - if (mi_arena_id_index(req_arena_id) < max_arena) { - void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - } - } - else { - // try numa affine allocation - for (size_t i = 0; i < max_arena; i++) { - void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - } - - // try from another numa node instead.. - if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already - for (size_t i = 0; i < max_arena; i++) { - void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - } - } - } - return NULL; -} // try to reserve a fresh arena space static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) @@ -323,56 +263,286 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) + + +/* ----------------------------------------------------------- + Arena iteration +----------------------------------------------------------- */ + +static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, mi_subproc_t* subproc, int numa_node, bool allow_large) { + if (subproc != NULL && arena->subproc != subproc) return false; + if (!allow_large && arena->is_large) return false; + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return false; + if (req_arena_id == _mi_arena_id_none()) { // if not specific, check numa affinity + const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); + if (!numa_suitable) return false; + } + return true; +} + +#define MI_THREADS_PER_ARENA (16) + +#define mi_forall_arenas(req_arena_id, subproc, allow_large, tseq, var_arena_id, var_arena) \ + { \ + size_t _max_arena; \ + size_t _start; \ + if (req_arena_id == _mi_arena_id_none()) { \ + _max_arena = mi_atomic_load_relaxed(&mi_arena_count); \ + _start = (_max_arena <= 1 ? 0 : (tseq / MI_THREADS_PER_ARENA) % _max_arena); \ + } \ + else { \ + _max_arena = 1; \ + _start = mi_arena_id_index(req_arena_id); \ + mi_assert_internal(mi_atomic_load_relaxed(&mi_arena_count) > _start); \ + } \ + for (size_t i = 0; i < _max_arena; i++) { \ + size_t _idx = i + _start; \ + if (_idx >= _max_arena) { _idx -= _max_arena; } \ + const mi_arena_id_t var_arena_id = mi_arena_id_create(_idx); \ + mi_arena_t* const var_arena = mi_arena_from_index(_idx); \ + if (mi_arena_is_suitable(var_arena,req_arena_id,subproc,-1 /* todo: numa node */,allow_large)) \ + { + +#define mi_forall_arenas_end() }}} + + +/* ----------------------------------------------------------- + Arena allocation +----------------------------------------------------------- */ + +// allocate blocks from the arenas +static mi_decl_noinline void* mi_arena_try_find_free( + size_t block_count, size_t alignment, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) { - mi_assert_internal(memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - size_t tseq = _mi_thread_seq_id(); - *memid = _mi_memid_none(); + mi_assert_internal(block_count <= mi_block_count_of_size(MI_ARENA_MAX_OBJ_SIZE)); + mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN); + if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL; - const int numa_node = _mi_os_numa_node(tld); // current numa node + // search arena's + mi_subproc_t* const subproc = tld->subproc; + const size_t tseq = tld->tseq; + mi_forall_arenas(req_arena_id, subproc, allow_large, tseq, arena_id, arena) + { + void* p = mi_arena_try_alloc_at(arena, block_count, commit, tseq, memid); + if (p != NULL) return p; + } + mi_forall_arenas_end(); + return NULL; +} - // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed? - if (size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && alignment <= MI_ARENA_BLOCK_ALIGN && align_offset == 0) { - void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); +// Allocate blocks from the arena's -- potentially allocating a fresh arena +static mi_decl_noinline void* mi_arena_try_alloc( + size_t block_count, size_t alignment, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) +{ + mi_assert(block_count <= MI_ARENA_MAX_OBJ_BLOCKS); + mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN); + + void* p = mi_arena_try_find_free(block_count, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + + // otherwise, try to first eagerly reserve a new arena + if (req_arena_id == _mi_arena_id_none()) { + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(mi_size_of_blocks(block_count), allow_large, req_arena_id, &arena_id)) { + // and try allocate in there + mi_assert_internal(req_arena_id == _mi_arena_id_none()); + p = mi_arena_try_find_free(block_count, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; - - // otherwise, try to first eagerly reserve a new arena - if (req_arena_id == _mi_arena_id_none()) { - mi_arena_id_t arena_id = 0; - if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { - // and try allocate in there - mi_assert_internal(req_arena_id == _mi_arena_id_none()); - p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - } - } } } +} +// Allocate from the OS (if allowed) +static void* mi_arena_os_alloc_aligned( + size_t size, size_t alignment, size_t align_offset, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) +{ // if we cannot use OS allocation, return NULL if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } - // finally, fall back to the OS if (align_offset > 0) { - return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); + return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, &tld->stats); } else { - return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, &tld->stats); } } + +// Allocate large sized memory +void* _mi_arena_alloc_aligned( + size_t size, size_t alignment, size_t align_offset, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) +{ + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + + // *memid = _mi_memid_none(); + // const int numa_node = _mi_os_numa_node(&tld->os); // current numa node + + // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed? + req_arena_id == _mi_arena_id_none() && // not a specific arena? + size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large + alignment <= MI_ARENA_BLOCK_ALIGN && align_offset == 0) // and good alignment + { + const size_t block_count = mi_block_count_of_size(size); + void* p = mi_arena_try_alloc(block_count, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + } + + // fall back to the OS + return mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena_id, memid, tld); +} + void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); } + +/* ----------------------------------------------------------- + Arena page allocation +----------------------------------------------------------- */ + +static mi_page_t* mi_arena_page_try_find_abandoned(size_t block_count, size_t block_size, mi_arena_id_t req_arena_id, mi_tld_t* tld) +{ + const size_t bin = _mi_bin(block_size); + mi_assert_internal(bin < MI_BIN_COUNT); + + // any abandoned in our size class? + mi_subproc_t* const subproc = tld->subproc; + if (mi_atomic_load_relaxed(&subproc->abandoned_count[bin]) == 0) return NULL; + + // search arena's + const bool allow_large = true; + size_t tseq = tld->tseq; + mi_forall_arenas(req_arena_id, subproc, allow_large, tseq, arena_id, arena) + { + size_t block_index; + if (mi_bitmap_try_find_and_clear(&arena->blocks_abandoned[bin], tseq, &block_index)) { + // found an abandoned page of the right size + mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]); + mi_page_t* page = (mi_page_t*)mi_arena_block_start(arena, block_index); + mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->blocks_free, block_index, block_count)); + mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, block_count)); + mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_dirty, block_index, block_count)); + mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->blocks_purge, block_index, block_count)); + mi_assert_internal(mi_page_block_size(page) == block_size); + mi_assert_internal(!mi_page_is_full(page)); + mi_assert_internal(mi_page_is_abandoned(page)); + return page; + } + } + mi_forall_arenas_end(); + return false; +} + +static mi_page_t* mi_arena_page_alloc_fresh(size_t block_count, size_t block_size, mi_arena_id_t req_arena_id, mi_tld_t* tld) +{ + const bool allow_large = true; + const bool commit = true; + const size_t alignment = MI_ARENA_BLOCK_ALIGN; + + // try to allocate from free space in arena's + mi_memid_t memid; + mi_page_t* page = NULL; + if (_mi_option_get_fast(mi_option_disallow_arena_alloc)==0 && req_arena_id == _mi_arena_id_none()) { + page = (mi_page_t*)mi_arena_try_alloc(block_count, alignment, commit, allow_large, req_arena_id, &memid, tld); + } + + // otherwise fall back to the OS + if (page == NULL) { + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_blocks(block_count), alignment, 0 /* align offset */, commit, allow_large, req_arena_id, &memid, tld); + } + + if (page == NULL) return NULL; + + // claimed free blocks: initialize the page partly + _mi_memzero_aligned(page, sizeof(*page)); + mi_assert(MI_PAGE_INFO_SIZE >= _mi_align_up(sizeof(*page), MI_PAGE_ALIGN)); + const size_t reserved = (mi_size_of_blocks(block_count) - MI_PAGE_INFO_SIZE) / block_size; + mi_assert_internal(reserved > 0 && reserved < UINT16_MAX); + page->reserved = reserved; + page->page_start = (uint8_t*)page + MI_PAGE_INFO_SIZE; + page->block_size = block_size; + page->memid = memid; + page->free_is_zero = memid.initially_zero; + if (block_size > 0 && _mi_is_power_of_two(block_size)) { + page->block_size_shift = (uint8_t)mi_ctz(block_size); + } + else { + page->block_size_shift = 0; + } + + mi_assert_internal(mi_page_block_size(page) == block_size); + mi_assert_internal(mi_page_is_abandoned(page)); + return page; +} + +// block_count: arena block count for the page +// block size : page block size +static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t block_count, size_t block_size) { + const size_t req_arena_id = heap->arena_id; + mi_tld_t* const tld = heap->tld; + + // 1. look for an abandoned page + mi_page_t* page = mi_arena_page_try_find_abandoned(block_count, block_size, req_arena_id, tld); + if (page != NULL) { + _mi_page_reclaim(heap,page); + return page; + } + + // 2. find a free block, potentially allocating a new arena + page = mi_arena_page_alloc_fresh(block_count, block_size, req_arena_id, tld); + if (page != NULL) { + _mi_page_init(heap, page); + return page; + } + + return NULL; +} + + +static mi_page_t* mi_singleton_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment) { + _mi_error_message(EINVAL, "singleton page is not yet implemented\n"); + return NULL; +} + + +mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment) { + mi_page_t* page; + if mi_unlikely(page_alignment > MI_BLOCK_ALIGNMENT_MAX) { + mi_assert_internal(_mi_is_power_of_two(page_alignment)); + page = mi_singleton_page_alloc(heap, block_size, page_alignment); + } + else if (block_size <= MI_SMALL_MAX_OBJ_SIZE) { + page = mi_arena_page_allocN(heap, mi_block_count_of_size(MI_SMALL_PAGE_SIZE), block_size); + } + else if (block_size <= MI_MEDIUM_MAX_OBJ_SIZE) { + page = mi_arena_page_allocN(heap, mi_block_count_of_size(MI_MEDIUM_PAGE_SIZE), block_size); + } + else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) { + page = mi_arena_page_allocN(heap, mi_block_count_of_size(MI_LARGE_PAGE_SIZE), block_size); + } + else { + page = mi_singleton_page_alloc(heap, block_size, page_alignment); + } + // mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc); + return page; +} + + /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ diff --git a/src/bitmap.c b/src/bitmap.c index 463d74c7..9faa9ae9 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -512,9 +512,9 @@ bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) } -#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \ +#define mi_bitmap_forall_set_chunks(bitmap,tseq,decl_chunk_idx) \ { size_t _set_idx; \ - size_t _start = start % MI_BFIELD_BITS; \ + size_t _start = tseq % MI_BFIELD_BITS; \ mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \ while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \ decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS; @@ -530,8 +530,8 @@ bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) // and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. // The low `MI_BFIELD_BITS` of start are used to set the start point of the search // (to reduce thread contention). -bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) { - mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) +bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) { + mi_bitmap_forall_set_chunks(bitmap,tseq,size_t chunk_idx) { size_t cidx; if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) { @@ -554,8 +554,8 @@ bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t star // Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. -bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) { - mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) +bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx ) { + mi_bitmap_forall_set_chunks(bitmap,tseq,size_t chunk_idx) { size_t cidx; if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) { @@ -576,11 +576,11 @@ bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pi // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. -bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) { +bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx ) { // TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger // TODO: allow spanning across chunk boundaries if (n == 0 || n > MI_BFIELD_BITS) return false; - mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) + mi_bitmap_forall_set_chunks(bitmap,tseq,size_t chunk_idx) { size_t cidx; if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) { diff --git a/src/bitmap.h b/src/bitmap.h index 198a2902..fcadc213 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -79,14 +79,14 @@ mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, si // and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. // The low `MI_BFIELD_BITS` of start are used to set the start point of the search // (to reduce thread contention). -mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start); +mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. -mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ); +mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx ); // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. -mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ); +mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx ); #endif // MI_XBITMAP_H diff --git a/src/page-map.c b/src/page-map.c index d3fcef79..cb527886 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -32,9 +32,13 @@ static bool mi_page_map_init(void) { return false; } if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) { - _mi_warning_message("the page map was committed on-demand but not zero initialized!\n"); + _mi_warning_message("the page map was committed but not zero initialized!\n"); _mi_memzero_aligned(_mi_page_map, page_map_size); } + // commit the first part so NULL pointers get resolved without an access violation + if (!mi_page_map_all_committed) { + _mi_os_commit(_mi_page_map, _mi_os_page_size(), NULL, NULL); + } return true; } @@ -72,7 +76,7 @@ void _mi_page_map_register(mi_page_t* page) { // set the offsets for (int i = 0; i < block_count; i++) { mi_assert_internal(i < 128); - _mi_page_map[idx + i] = (int8_t)(-i-1); + _mi_page_map[idx + i] = (signed char)(-i-1); } } diff --git a/src/page.c b/src/page.c index a00ff615..fa006085 100644 --- a/src/page.c +++ b/src/page.c @@ -119,7 +119,7 @@ bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(page->keys[0] != 0); #endif if (mi_page_heap(page)!=NULL) { - mi_assert_internal(!_mi_process_is_initialized || page->thread_id == mi_page_heap(page)->thread_id || page->thread_id==0); + mi_assert_internal(!_mi_process_is_initialized || mi_page_thread_id(page) == mi_page_heap(page)->thread_id || mi_page_thread_id(page)==0); { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); @@ -249,19 +249,22 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { + mi_page_set_heap(page, heap); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + _mi_page_free_collect(page, false); // ensure used count is up to date + mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); - #if MI_HUGE_PAGE_ABANDON - mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - #endif - + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } + + // allocate a fresh page from a segment static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) { #if !MI_HUGE_PAGE_ABANDON @@ -269,16 +272,12 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_assert_internal(mi_heap_contains_queue(heap, pq)); mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size); #endif - mi_page_t* page = _mi_heap_page_alloc(heap, block_size, page_alignment); + mi_page_t* page = _mi_arena_page_alloc(heap, block_size, page_alignment); if (page == NULL) { // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) return NULL; } mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); - // a fresh page was found, initialize it - const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc - mi_assert_internal(full_block_size >= block_size); - mi_page_init(heap, page, full_block_size, heap->tld); mi_heap_stat_increase(heap, pages, 1); if (pq != NULL) { mi_page_queue_push(heap, pq, page); } mi_assert_expensive(_mi_page_is_valid(page)); @@ -389,7 +388,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { // and abandon it mi_assert_internal(mi_page_is_abandoned(page)); - _mi_arena_page_abandon(page,&pheap->tld); + _mi_arena_page_abandon(page, pheap->tld); } // force abandon a page @@ -432,7 +431,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { // and free it mi_page_set_heap(page,NULL); - _mi_arena_page_free(page, force, &pheap->tld); + _mi_arena_page_free(page, pheap->tld); } #define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE @@ -617,7 +616,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); @@ -629,7 +628,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) size_t page_size; //uint8_t* page_start = mi_page_area(page, &page_size); - mi_stat_counter_increase(tld->stats.pages_extended, 1); + mi_heap_stat_counter_increase(heap, pages_extended, 1); // calculate the extend count const size_t bsize = mi_page_block_size(page); @@ -651,48 +650,37 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // and append the extend the free list if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(page, bsize, extend, &tld->stats ); + mi_page_free_list_extend(page, bsize, extend, &heap->tld->stats ); } else { - mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats); + mi_page_free_list_extend_secure(heap, page, bsize, extend, &heap->tld->stats); } // enable the new free list page->capacity += (uint16_t)extend; - mi_stat_increase(tld->stats.page_committed, extend * bsize); + mi_heap_stat_increase(heap, page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); } -// Initialize a fresh page -static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) { +// Initialize a fresh page (that is already partially initialized) +void _mi_page_init(mi_heap_t* heap, mi_page_t* page) { mi_assert(page != NULL); - mi_assert_internal(block_size > 0); - // set fields mi_page_set_heap(page, heap); - page->block_size = block_size; size_t page_size; uint8_t* page_start = mi_page_area(page, &page_size); mi_track_mem_noaccess(page_start,page_size); - mi_assert_internal(page_size / block_size < (1L<<16)); - page->reserved = (uint16_t)(page_size / block_size); + mi_assert_internal(page_size / mi_page_block_size(page) < (1L<<16)); mi_assert_internal(page->reserved > 0); #if (MI_PADDING || MI_ENCODE_FREELIST) page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif - page->free_is_zero = page->memid.initially_zero; #if MI_DEBUG>2 if (page->memid.initially_zero) { mi_track_mem_defined(page->page_start, page_size); mi_assert_expensive(mi_mem_is_zero(page_start, page_size)); } #endif - if (block_size > 0 && _mi_is_power_of_two(block_size)) { - page->block_size_shift = (uint8_t)mi_ctz(block_size); - } - else { - page->block_size_shift = 0; - } - + mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); @@ -705,11 +693,11 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->keys[0] != 0); mi_assert_internal(page->keys[1] != 0); #endif - mi_assert_internal(page->block_size_shift == 0 || (block_size == ((size_t)1 << page->block_size_shift))); + mi_assert_internal(page->block_size_shift == 0 || (mi_page_block_size(page) == ((size_t)1 << page->block_size_shift))); mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,tld); + mi_page_extend_free(heap,page); mi_assert(mi_page_immediate_available(page)); }