From 441d4fed9fd302bb2a2b326bc8b134c8a15982bb Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 29 Nov 2024 10:40:18 -0800 Subject: [PATCH] wip: further progress on removing segments --- CMakeLists.txt | 1 + ide/vs2022/mimalloc.vcxproj | 12 +- ide/vs2022/mimalloc.vcxproj.filters | 13 +- include/mimalloc/bits.h | 6 + include/mimalloc/internal.h | 183 +++--- include/mimalloc/types.h | 271 +++----- src/alloc.c | 2 +- src/{xarena.c => arena-old.c} | 875 ++------------------------ src/arena.c | 871 ++++++++++++++++++++++++-- src/bitmap-old.c | 419 +++++++++++++ src/bitmap-old.h | 110 ++++ src/bitmap.c | 940 +++++++++++++++++----------- src/bitmap.h | 154 ++--- src/free.c | 118 ++-- src/heap.c | 5 +- src/os.c | 55 +- src/page-map.c | 90 +++ src/page.c | 67 +- src/static.c | 3 +- src/xbitmap.c | 599 ------------------ src/xbitmap.h | 94 --- 21 files changed, 2396 insertions(+), 2492 deletions(-) rename src/{xarena.c => arena-old.c} (53%) create mode 100644 src/bitmap-old.c create mode 100644 src/bitmap-old.h create mode 100644 src/page-map.c delete mode 100644 src/xbitmap.c delete mode 100644 src/xbitmap.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fc1808e..5cb05840 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,7 @@ set(mi_sources src/options.c src/os.c src/page.c + src/page-map.c src/random.c src/segment.c src/segment-map.c diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 138acf39..3dd7326f 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -214,12 +214,7 @@ - - true - true - true - true - + false @@ -232,6 +227,7 @@ + true @@ -248,12 +244,8 @@ - - - - diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters index 48958be1..2eed7e90 100644 --- a/ide/vs2022/mimalloc.vcxproj.filters +++ b/ide/vs2022/mimalloc.vcxproj.filters @@ -43,12 +43,6 @@ Sources - - Sources - - - Sources - Sources @@ -58,13 +52,10 @@ Sources - + Sources - - Sources - - + Sources diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 642f0f9c..ad7ea3e6 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -100,6 +100,10 @@ typedef int32_t mi_ssize_t; #define __BMI1__ 1 #endif +// Define big endian if needed +// #define MI_BIG_ENDIAN 1 + + /* -------------------------------------------------------------------------------- Builtin's -------------------------------------------------------------------------------- */ @@ -310,4 +314,6 @@ static inline size_t mi_rotl(size_t x, size_t r) { #endif } + + #endif // MI_BITS_H diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b997099e..2713c0ac 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -108,6 +108,7 @@ size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); +size_t _mi_os_virtual_address_bits(void); bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); @@ -136,12 +137,11 @@ bool _mi_arena_contains(const void* p); void _mi_arenas_collect(bool force_purge, mi_stats_t* stats); void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); -bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment); -void _mi_arena_segment_mark_abandoned(mi_segment_t* segment); void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid); void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size); +/* typedef struct mi_arena_field_cursor_s { // abstract struct size_t os_list_count; // max entries to visit in the OS abandoned list size_t start; // start arena idx (may need to be wrapped) @@ -154,27 +154,12 @@ typedef struct mi_arena_field_cursor_s { // abstract struct void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current); mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous); void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current); +*/ -// "segment-map.c" -void _mi_segment_map_allocated_at(const mi_segment_t* segment); -void _mi_segment_map_freed_at(const mi_segment_t* segment); +// "page-map.c" +void _mi_page_map_register(mi_page_t* page); +void _mi_page_map_unregister(mi_page_t* page); -// "segment.c" -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); -void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); -void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); - -#if MI_HUGE_PAGE_ABANDON -void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); -#else -void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); -#endif - -void _mi_segments_collect(bool force, mi_segments_tld_t* tld); -void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); -bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment); -bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; @@ -226,7 +211,7 @@ void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, siz void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); -void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration +// void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); // "libc.c" @@ -338,8 +323,8 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { // Align a pointer upwards -static inline void* mi_align_up_ptr(void* p, size_t alignment) { - return (void*)_mi_align_up((uintptr_t)p, alignment); +static inline uint8_t* _mi_align_up_ptr(void* p, size_t alignment) { + return (uint8_t*)_mi_align_up((uintptr_t)p, alignment); } @@ -445,68 +430,44 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si return heap->pages_free_direct[idx]; } -// Segment that contains the pointer -// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), -// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; -// therefore we align one byte before `p`. -// We check for NULL afterwards on 64-bit systems to improve codegen for `mi_free`. -static inline mi_segment_t* _mi_ptr_segment(const void* p) { - mi_segment_t* const segment = (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); - #if MI_INTPTR_SIZE <= 4 - return (p==NULL ? NULL : segment); - #else - return ((intptr_t)segment <= 0 ? NULL : segment); + +extern signed char* _mi_page_map; + +#define MI_PAGE_PTR_INVALID ((mi_page_t*)(1)) + +static inline mi_page_t* _mi_ptr_page(const void* p) { + const uintptr_t up = ((uintptr_t)p) >> MI_ARENA_BLOCK_SHIFT; + const ptrdiff_t ofs = _mi_page_map[up]; + #if MI_DEBUG + if mi_unlikely(ofs==0) return MI_PAGE_PTR_INVALID; #endif + return (mi_page_t*)((up + ofs - 1) << MI_ARENA_BLOCK_SHIFT); } -// Segment belonging to a page -static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { - mi_assert_internal(page!=NULL); - mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]); - return segment; -} -// used internally -static inline size_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { - // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages - ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && (size_t)diff <= MI_SEGMENT_SIZE /* for huge alignment it can be equal */); - size_t idx = (size_t)diff >> segment->page_shift; - mi_assert_internal(idx < segment->capacity); - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); - return idx; -} - -// Get the page containing the pointer -static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { - size_t idx = _mi_segment_page_idx_of(segment, p); - return &((mi_segment_t*)segment)->pages[idx]; -} - -// Quick page start for initialized pages -static inline uint8_t* mi_page_start(const mi_page_t* page) { - mi_assert_internal(page->page_start != NULL); - mi_assert_expensive(_mi_segment_page_start(_mi_page_segment(page),page,NULL) == page->page_start); - return page->page_start; -} - -// Get the page containing the pointer -static inline mi_page_t* _mi_ptr_page(void* p) { - mi_assert_internal(p!=NULL); - return _mi_segment_page_of(_mi_ptr_segment(p), p); -} - -// Get the block size of a page (special case for huge objects) +// Get the block size of a page static inline size_t mi_page_block_size(const mi_page_t* page) { mi_assert_internal(page->block_size > 0); return page->block_size; } -static inline bool mi_page_is_huge(const mi_page_t* page) { - mi_assert_internal((page->is_huge && _mi_page_segment(page)->page_kind == MI_PAGE_HUGE) || - (!page->is_huge && _mi_page_segment(page)->page_kind != MI_PAGE_HUGE)); - return page->is_huge; +// Page start +static inline uint8_t* mi_page_start(const mi_page_t* page) { + mi_assert(sizeof(mi_page_t) <= MI_PAGE_INFO_SIZE); + return (uint8_t*)page + MI_PAGE_INFO_SIZE; +} + +static inline uint8_t* mi_page_area(const mi_page_t* page, size_t* size) { + if (size) { *size = mi_page_block_size(page) * page->reserved; } + return mi_page_start(page); +} + +static inline bool mi_page_is_in_arena(const mi_page_t* page) { + return (page->memid.memkind == MI_MEM_ARENA); +} + +static inline bool mi_page_is_singleton(const mi_page_t* page) { + return (page->reserved == 1); } // Get the usable block size of a page without fixed padding. @@ -515,11 +476,6 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { return mi_page_block_size(page) - MI_PADDING_SIZE; } -// size of a segment -static inline size_t mi_segment_size(mi_segment_t* segment) { - return segment->segment_size; -} - // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); @@ -534,10 +490,20 @@ static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); } +static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) { + return mi_atomic_load_relaxed(&page->xthread_id); +} + static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); mi_atomic_store_release(&page->xheap,(uintptr_t)heap); - if (heap != NULL) { page->heap_tag = heap->tag; } + if (heap != NULL) { + page->heap_tag = heap->tag; + mi_atomic_store_release(&page->xthread_id, heap->thread_id); + } + else { + mi_atomic_store_release(&page->xthread_id,0); + } } // Thread free flag helpers @@ -576,6 +542,21 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { return (page->free != NULL); } + +// is the page not yet used up to its reserved space? +static inline bool mi_page_is_expandable(const mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_internal(page->capacity <= page->reserved); + return (page->capacity < page->reserved); +} + + +static inline bool mi_page_is_full(mi_page_t* page) { + bool full = (page->reserved == page->used); + mi_assert_internal(!full || page->free == NULL); + return full; +} + // is more than 7/8th of a page in use? static inline bool mi_page_mostly_used(const mi_page_t* page) { if (page==NULL) return true; @@ -583,6 +564,15 @@ static inline bool mi_page_mostly_used(const mi_page_t* page) { return (page->reserved - page->used <= frac); } +static inline bool mi_page_is_abandoned(mi_page_t* page) { + return (mi_page_thread_id(page) == 0); +} + +static inline bool mi_page_is_huge(mi_page_t* page) { + return (page->block_size > MI_LARGE_MAX_OBJ_SIZE); +} + + static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; } @@ -667,17 +657,8 @@ We also pass a separate `null` value to be used as `NULL` or otherwise `(k2<<= 655360) -#error "mimalloc internal: define more bins" -#endif - -// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`) -#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX) - -// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments -#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) +// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated orphan pages +#define MI_BLOCK_ALIGNMENT_MAX (MI_ARENA_BLOCK_ALIGN) // We never allocate more than PTRDIFF_MAX (see also ) -#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX +#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX + + +// --------------------------------------------------------------- +// a memory id tracks the provenance of arena/OS allocated memory +// --------------------------------------------------------------- + +// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. +typedef enum mi_memkind_e { + MI_MEM_NONE, // not allocated + MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) + MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) + MI_MEM_OS, // allocated from the OS + MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory) + MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) + MI_MEM_ARENA // allocated from an arena (the usual case) +} mi_memkind_t; + +static inline bool mi_memkind_is_os(mi_memkind_t memkind) { + return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); +} + +typedef struct mi_memid_os_info { + void* base; // actual base address of the block (used for offset aligned allocations) + size_t alignment; // alignment at allocation +} mi_memid_os_info_t; + +typedef struct mi_memid_arena_info { + size_t block_index; // index in the arena + mi_arena_id_t id; // arena id (>= 1) + bool is_exclusive; // this arena can only be used for specific arena allocations +} mi_memid_arena_info_t; + +typedef struct mi_memid_s { + union { + mi_memid_os_info_t os; // only used for MI_MEM_OS + mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA + } mem; + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages) + bool initially_committed;// `true` if the memory was originally allocated as committed + bool initially_zero; // `true` if the memory was originally zero initialized + mi_memkind_t memkind; +} mi_memid_t; + // ------------------------------------------------------ // Mimalloc pages contain allocated blocks @@ -223,6 +248,10 @@ typedef union mi_page_flags_s { // We use the bottom 2 bits of the pointer for mi_delayed_t flags typedef uintptr_t mi_thread_free_t; +// Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython) +typedef struct mi_subproc_s mi_subproc_t; + + // A page contains blocks of one specific size (`block_size`). // Each page has three list of free blocks: // `free` for blocks that can be allocated, @@ -242,8 +271,6 @@ typedef uintptr_t mi_thread_free_t; // Notes: // - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc` // - Using `uint16_t` does not seem to slow things down -// - The size is 10 words on 64-bit which helps the page index calculations -// (and 12 words on 32-bit, and encoded free lists add 2 words) // - `xthread_free` uses the bottom bits as a delayed-free flags to optimize // concurrent frees where only the first concurrent free adds to the owning // heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`). @@ -252,15 +279,8 @@ typedef uintptr_t mi_thread_free_t; // the owning heap `thread_delayed_free` list. This guarantees that pages // will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { - // "owned" by the segment - uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` - uint8_t segment_in_use:1; // `true` if the segment allocated this page - uint8_t is_committed:1; // `true` if the page virtual memory is committed - uint8_t is_zero_init:1; // `true` if the page was initially zero initialized - uint8_t is_huge:1; // `true` if the page is in a huge segment - - // layout like this to optimize access in `mi_malloc` and `mi_free` - uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` + mi_memid_t memid; // provenance of the page memory + uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation) uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized @@ -272,120 +292,54 @@ typedef struct mi_page_s { uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type // padding - size_t block_size; // size available in each block (always `>0`) - uint8_t* page_start; // start of the page area containing the blocks + size_t block_size; // size available in each block (always `>0`) #if (MI_ENCODE_FREELIST || MI_PADDING) uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary #endif _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads - _Atomic(uintptr_t) xheap; + _Atomic(uintptr_t) xheap; // heap this threads belong to. + _Atomic(mi_threadid_t)xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned) struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` - - #if MI_INTPTR_SIZE==4 // pad to 12 words on 32-bit - void* padding[1]; - #endif } mi_page_t; +// ------------------------------------------------------ +// Object sizes +// ------------------------------------------------------ + +#define MI_PAGE_ALIGN (64) +#define MI_PAGE_INFO_SIZE (MI_SIZE_SHIFT*MI_PAGE_ALIGN) // should be > sizeof(mi_page_t) + +// The max object size are checked to not waste more than 12.5% internally over the page sizes. +// (Except for large pages since huge objects are allocated in 4MiB chunks) +#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // ~16KiB +#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // ~128KiB +#define MI_LARGE_MAX_OBJ_SIZE ((MI_LARGE_PAGE_SIZE-MI_PAGE_INFO_SIZE)/2) // ~2MiB +#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE) + + +#if (MI_LARGE_MAX_OBJ_WSIZE >= 655360) +#error "mimalloc internal: define more bins" +#endif + // ------------------------------------------------------ -// Mimalloc segments contain mimalloc pages +// Page kinds // ------------------------------------------------------ typedef enum mi_page_kind_e { - MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment - MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment - MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment - MI_PAGE_HUGE // a huge page is a single page in a segment of variable size (but still 2MiB aligned) + MI_PAGE_SMALL, // small blocks go into 64KiB pages + MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages + MI_PAGE_LARGE, // larger blocks go into 4MiB pages + MI_PAGE_SINGLETON // page containing a single block. // used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`. } mi_page_kind_t; -// --------------------------------------------------------------- -// a memory id tracks the provenance of arena/OS allocated memory -// --------------------------------------------------------------- - -// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. -typedef enum mi_memkind_e { - MI_MEM_NONE, // not allocated - MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) - MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) - MI_MEM_OS, // allocated from the OS - MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory) - MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) - MI_MEM_ARENA // allocated from an arena (the usual case) -} mi_memkind_t; - -static inline bool mi_memkind_is_os(mi_memkind_t memkind) { - return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); -} - -typedef struct mi_memid_os_info { - void* base; // actual base address of the block (used for offset aligned allocations) - size_t alignment; // alignment at allocation -} mi_memid_os_info_t; - -typedef struct mi_memid_arena_info { - size_t block_index; // index in the arena - mi_arena_id_t id; // arena id (>= 1) - bool is_exclusive; // this arena can only be used for specific arena allocations -} mi_memid_arena_info_t; - -typedef struct mi_memid_s { - union { - mi_memid_os_info_t os; // only used for MI_MEM_OS - mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA - } mem; - bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages) - bool initially_committed;// `true` if the memory was originally allocated as committed - bool initially_zero; // `true` if the memory was originally zero initialized - mi_memkind_t memkind; -} mi_memid_t; - - -// --------------------------------------------------------------- -// Segments contain mimalloc pages -// --------------------------------------------------------------- -typedef struct mi_subproc_s mi_subproc_t; - -// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS. -// Inside segments we allocated fixed size _pages_ that contain blocks. -typedef struct mi_segment_s { - // constant fields - mi_memid_t memid; // memory id to track provenance - bool allow_decommit; - bool allow_purge; - size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` - mi_subproc_t* subproc; // segment belongs to sub process - - // segment fields - struct mi_segment_s* next; // must be the first (non-constant) segment field -- see `segment.c:segment_init` - struct mi_segment_s* prev; - bool was_reclaimed; // true if it was reclaimed (used to limit reclaim-on-free reclamation) - bool dont_free; // can be temporarily true to ensure the segment is not freed - - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t abandoned_visits; // count how often this segment is visited for reclaiming (to force reclaim if it is too long) - - size_t used; // count of pages in use (`used <= capacity`) - size_t capacity; // count of available pages (`#free + used`) - size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` - - struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled - struct mi_segment_s* abandoned_os_prev; - - // layout like this to optimize access in `mi_free` - _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment - size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge - mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages -} mi_segment_t; - // ------------------------------------------------------ // Heaps @@ -522,21 +476,18 @@ typedef struct mi_stat_counter_s { } mi_stat_counter_t; typedef struct mi_stats_s { - mi_stat_count_t segments; mi_stat_count_t pages; mi_stat_count_t reserved; mi_stat_count_t committed; mi_stat_count_t reset; mi_stat_count_t purged; mi_stat_count_t page_committed; - mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; mi_stat_count_t threads; mi_stat_count_t normal; mi_stat_count_t huge; mi_stat_count_t giant; mi_stat_count_t malloc; - mi_stat_count_t segments_cache; mi_stat_counter_t pages_extended; mi_stat_counter_t mmap_calls; mi_stat_counter_t commit_calls; @@ -581,12 +532,12 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // ------------------------------------------------------ struct mi_subproc_s { - _Atomic(size_t) abandoned_count; // count of abandoned segments for this sub-process - _Atomic(size_t) abandoned_os_list_count; // count of abandoned segments in the os-list - mi_lock_t abandoned_os_lock; // lock for the abandoned os segment list (outside of arena's) (this lock protect list operations) + _Atomic(size_t) abandoned_count; // count of abandoned pages for this sub-process + _Atomic(size_t) abandoned_os_list_count; // count of abandoned pages in the os-list + mi_lock_t abandoned_os_lock; // lock for the abandoned os pages list (outside of arena's) (this lock protect list operations) mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list - mi_segment_t* abandoned_os_list; // doubly-linked list of abandoned segments outside of arena's (in OS allocated memory) - mi_segment_t* abandoned_os_list_tail; // the tail-end of the list + mi_page_t* abandoned_os_list; // doubly-linked list of abandoned pages outside of arena's (in OS allocated memory) + mi_page_t* abandoned_os_list_tail; // the tail-end of the list mi_memid_t memid; // provenance of this memory block }; @@ -597,11 +548,6 @@ struct mi_subproc_s { // Milliseconds as in `int64_t` to avoid overflows typedef int64_t mi_msecs_t; -// Queue of segments -typedef struct mi_segment_queue_s { - mi_segment_t* first; - mi_segment_t* last; -} mi_segment_queue_t; // OS thread local data typedef struct mi_os_tld_s { @@ -609,28 +555,13 @@ typedef struct mi_os_tld_s { mi_stats_t* stats; // points to tld stats } mi_os_tld_t; -// Segments thread local data -typedef struct mi_segments_tld_s { - mi_segment_queue_t small_free; // queue of segments with free small pages - mi_segment_queue_t medium_free; // queue of segments with free medium pages - mi_page_queue_t pages_purge; // queue of freed pages that are delay purged - size_t count; // current number of segments; - size_t peak_count; // peak number of segments - size_t current_size; // current size of all segments - size_t peak_size; // peak size of all segments - size_t reclaim_count;// number of reclaimed (abandoned) segments - mi_subproc_t* subproc; // sub-process this thread belongs to. - mi_stats_t* stats; // points to tld stats - mi_os_tld_t* os; // points to os tld -} mi_segments_tld_t; - // Thread local data struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count bool recurse; // true if deferred was called; used to prevent infinite recursion. mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) - mi_segments_tld_t segments; // segment tld + mi_subproc_t* subproc; // sub-process this thread belongs to. mi_os_tld_t os; // os tld mi_stats_t stats; // statistics }; diff --git a/src/alloc.c b/src/alloc.c index a093f108..00f6d1a4 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -82,7 +82,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ #if (MI_STAT>0) const size_t bsize = mi_page_usable_block_size(page); - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + if (bsize <= MI_LARGE_MAX_OBJ_SIZE) { mi_heap_stat_increase(heap, normal, bsize); mi_heap_stat_counter_increase(heap, normal_count, 1); #if (MI_STAT>1) diff --git a/src/xarena.c b/src/arena-old.c similarity index 53% rename from src/xarena.c rename to src/arena-old.c index 42943f84..8ca5aaf3 100644 --- a/src/xarena.c +++ b/src/arena-old.c @@ -21,834 +21,46 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo #include "mimalloc.h" #include "mimalloc/internal.h" -#include "xbitmap.h" +#include "mimalloc/atomic.h" +#include "bitmap.h" /* ----------------------------------------------------------- Arena allocation ----------------------------------------------------------- */ -#define MI_ARENA_BLOCK_SIZE (MI_SMALL_PAGE_SIZE) // 64KiB -#define MI_ARENA_BLOCK_ALIGN (MI_ARENA_BLOCK_SIZE) // 64KiB -#define MI_ARENA_BIN_COUNT (MI_BIN_COUNT) - -#define MI_ARENA_MIN_OBJ_SIZE MI_ARENA_BLOCK_SIZE -#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_CHUNK_BITS * MI_ARENA_BLOCK_SIZE) // for now, cannot cross chunk boundaries - // A memory arena descriptor typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific mi_memid_t memid; // memid of the memory area - // _Atomic(uint8_t*) start; // the start of the memory area - // size_t meta_size; // size of the arena structure itself (including its bitmaps) - // mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) + _Atomic(uint8_t*)start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + size_t meta_size; // size of the arena structure itself (including its bitmaps) + mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node bool exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited - _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. - - mi_bitmap_t blocks_free; // is the block free? - mi_bitmap_t blocks_committed; // is the block committed? (i.e. accessible) - mi_bitmap_t blocks_purge; // can the block be purged? (block in purge => block in free) - mi_bitmap_t blocks_dirty; // is the block potentially non-zero? - mi_bitmap_t blocks_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page) - // the full queue contains abandoned full pages + _Atomic(size_t)search_idx; // optimization to start the search for free blocks + _Atomic(mi_msecs_t)purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) + mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) + // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. } mi_arena_t; -#define MI_MAX_ARENAS (1024) // Limited for now (and takes up .bss) + +#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB +#define MI_MAX_ARENAS (132) // Limited as the reservation exponentially increases (and takes up .bss) // The available arenas static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 - -/* ----------------------------------------------------------- - Arena id's - id = arena_index + 1 ------------------------------------------------------------ */ - -size_t mi_arena_id_index(mi_arena_id_t id) { - return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); -} - -static mi_arena_id_t mi_arena_id_create(size_t arena_index) { - mi_assert_internal(arena_index < MI_MAX_ARENAS); - return (int)arena_index + 1; -} - -mi_arena_id_t _mi_arena_id_none(void) { - return 0; -} - -static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { - return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || - (arena_id == req_arena_id)); -} - -bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { - if (memid.memkind == MI_MEM_ARENA) { - return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); - } - else { - return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id); - } -} - -size_t mi_arena_get_count(void) { - return mi_atomic_load_relaxed(&mi_arena_count); -} - -mi_arena_t* mi_arena_from_index(size_t idx) { - mi_assert_internal(idx < mi_arena_get_count()); - return mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[idx]); -} - - - -/* ----------------------------------------------------------- - Util ------------------------------------------------------------ */ - -// Blocks needed for a given byte size -static size_t mi_block_count_of_size(size_t size) { - return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); -} - -// Byte size of a number of blocks -static size_t mi_size_of_blocks(size_t bcount) { - return (bcount * MI_ARENA_BLOCK_SIZE); -} - -// Size of an arena -static size_t mi_arena_size(mi_arena_t* arena) { - return mi_size_of_blocks(arena->block_count); -} - -static size_t mi_arena_info_blocks(void) { - const size_t os_page_size = _mi_os_page_size(); - const size_t info_size = _mi_align_up(sizeof(mi_arena_t), os_page_size) + os_page_size; // + guard page - const size_t info_blocks = mi_block_count_of_size(info_size); - return info_blocks; -} - - -// Start of the arena memory area -static uint8_t* mi_arena_start(mi_arena_t* arena) { - return ((uint8_t*)arena); -} - -// Start of a block -void* mi_arena_block_start(mi_arena_t* arena, size_t block_index) { - return (mi_arena_start(arena) + mi_size_of_blocks(block_index)); -} - -// Arena area -void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { - if (size != NULL) *size = 0; - const size_t arena_index = mi_arena_id_index(arena_id); - if (arena_index >= MI_MAX_ARENAS) return NULL; - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); - if (arena == NULL) return NULL; - if (size != NULL) { *size = mi_size_of_blocks(arena->block_count); } - return mi_arena_start(arena); -} - - -// Create an arena memid -static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, size_t block_index) { - mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); - memid.mem.arena.id = id; - memid.mem.arena.block_index = block_index; - memid.mem.arena.is_exclusive = is_exclusive; - return memid; -} - -// returns if the arena is exclusive -bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, size_t* block_index) { - mi_assert_internal(memid.memkind == MI_MEM_ARENA); - *arena_index = mi_arena_id_index(memid.mem.arena.id); - *block_index = memid.mem.arena.block_index; - return memid.mem.arena.is_exclusive; -} - - - -/* ----------------------------------------------------------- - Arena Allocation ------------------------------------------------------------ */ - -static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool commit, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) -{ - MI_UNUSED(arena_index); - mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); - - size_t block_index; - if (!mi_bitmap_try_find_and_clearN(&arena->blocks_free, tseq, needed_bcount, &block_index)) return NULL; - - // claimed it! - void* p = mi_arena_block_start(arena, block_index); - *memid = mi_memid_create_arena(arena->id, arena->exclusive, block_index); - memid->is_pinned = arena->memid.is_pinned; - - // set the dirty bits - if (arena->memid.initially_zero) { - memid->initially_zero = mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_dirty, block_index, needed_bcount, NULL); - } - - // set commit state - if (commit) { - // commit requested, but the range may not be committed as a whole: ensure it is committed now - memid->initially_committed = true; - - bool all_already_committed; - mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount, &all_already_committed); - if (!all_already_committed) { - bool commit_zero = false; - if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) { - memid->initially_committed = false; - } - else { - if (commit_zero) { memid->initially_zero = true; } - } - } - } - else { - // no need to commit, but check if already fully committed - memid->initially_committed = mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount); - } - - return p; -} - -// allocate in a speficic arena -static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, - size_t size, size_t alignment, - bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) -{ - mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN); - if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL; - - const size_t bcount = mi_block_count_of_size(size); - const size_t arena_index = mi_arena_id_index(arena_id); - mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); - mi_assert_internal(size <= mi_size_of_blocks(bcount)); - - // Check arena suitability - mi_arena_t* arena = mi_arena_from_index(arena_index); - if (arena == NULL) return NULL; - if (!allow_large && arena->is_large) return NULL; - if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; - if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity - const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); - if (match_numa_node) { if (!numa_suitable) return NULL; } - else { if (numa_suitable) return NULL; } - } - - // try to allocate - void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, tseq, memid, tld); - mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment)); - return p; -} - - -// allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, - bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) -{ - mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN); - if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL; - - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - if mi_likely(max_arena == 0) return NULL; - - if (req_arena_id != _mi_arena_id_none()) { - // try a specific arena if requested - if (mi_arena_id_index(req_arena_id) < max_arena) { - void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - } - } - else { - // try numa affine allocation - for (size_t i = 0; i < max_arena; i++) { - void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - } - - // try from another numa node instead.. - if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already - for (size_t i = 0; i < max_arena; i++) { - void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - } - } - } - return NULL; -} - -// try to reserve a fresh arena space -static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) -{ - if (_mi_preloading()) return false; // use OS only while pre loading - if (req_arena_id != _mi_arena_id_none()) return false; - - const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); - if (arena_count > (MI_MAX_ARENAS - 4)) return false; - - // calc reserve - size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); - if (arena_reserve == 0) return false; - - if (!_mi_os_has_virtual_reserve()) { - arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example) - } - arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); - - if (arena_count >= 8 && arena_count <= 128) { - // scale up the arena sizes exponentially every 8 entries (128 entries get to 589TiB) - const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16); - size_t reserve = 0; - if (!mi_mul_overflow(multiplier, arena_reserve, &reserve)) { - arena_reserve = reserve; - } - } - - // check arena bounds - const size_t min_reserve = mi_size_of_blocks(mi_arena_info_blocks() + 1); - const size_t max_reserve = MI_BITMAP_MAX_BITS * MI_ARENA_BLOCK_SIZE; - if (arena_reserve < min_reserve) { - arena_reserve = min_reserve; - } - else if (arena_reserve > max_reserve) { - arena_reserve = max_reserve; - } - - if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size - - // commit eagerly? - bool arena_commit = false; - if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } - else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } - - return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0); -} - - -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - size_t tseq = _mi_thread_seq_id(); - *memid = _mi_memid_none(); - - const int numa_node = _mi_os_numa_node(tld); // current numa node - - // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed? - if (size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && alignment <= MI_ARENA_BLOCK_ALIGN && align_offset == 0) { - void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - - // otherwise, try to first eagerly reserve a new arena - if (req_arena_id == _mi_arena_id_none()) { - mi_arena_id_t arena_id = 0; - if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { - // and try allocate in there - mi_assert_internal(req_arena_id == _mi_arena_id_none()); - p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); - if (p != NULL) return p; - } - } - } - } - - // if we cannot use OS allocation, return NULL - if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) { - errno = ENOMEM; - return NULL; - } - - // finally, fall back to the OS - if (align_offset > 0) { - return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); - } - else { - return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); - } -} - -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) -{ - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); -} - - -/* ----------------------------------------------------------- - Arena free ------------------------------------------------------------ */ -static void mi_arena_schedule_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats); -static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats); - -void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); - mi_assert_internal(committed_size <= size); - if (p==NULL) return; - if (size==0) return; - const bool all_committed = (committed_size == size); - - // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) - mi_track_mem_undefined(p, size); - - if (mi_memkind_is_os(memid.memkind)) { - // was a direct OS allocation, pass through - if (!all_committed && committed_size > 0) { - // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) - _mi_stat_decrease(&_mi_stats_main.committed, committed_size); - } - _mi_os_free(p, size, memid, stats); - } - else if (memid.memkind == MI_MEM_ARENA) { - // allocated in an arena - size_t arena_idx; - size_t block_idx; - mi_arena_memid_indices(memid, &arena_idx, &block_idx); - mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); - mi_assert_internal(arena != NULL); - const size_t blocks = mi_block_count_of_size(size); - - // checks - if (arena == NULL) { - _mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); - return; - } - mi_assert_internal(block_idx < arena->block_count); - mi_assert_internal(block_idx > mi_arena_info_blocks()); - if (block_idx <= mi_arena_info_blocks() || block_idx > arena->block_count) { - _mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); - return; - } - - // potentially decommit - if (arena->memid.is_pinned || arena->memid.initially_committed) { - mi_assert_internal(all_committed); - } - else { - if (!all_committed) { - // mark the entire range as no longer committed (so we recommit the full range when re-using) - mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_committed, blocks, block_idx, NULL); - mi_track_mem_noaccess(p, size); - if (committed_size > 0) { - // if partially committed, adjust the committed stats (is it will be recommitted when re-using) - // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. - _mi_stat_decrease(&_mi_stats_main.committed, committed_size); - } - // note: if not all committed, it may be that the purge will reset/decommit the entire range - // that contains already decommitted parts. Since purge consistently uses reset or decommit that - // works (as we should never reset decommitted parts). - } - // (delay) purge the entire range - mi_arena_schedule_purge(arena, block_idx, blocks, stats); - } - - // and make it available to others again - bool all_inuse = mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_free, block_idx, blocks, NULL); - if (!all_inuse) { - _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size); - return; - }; - } - else { - // arena was none, external, or static; nothing to do - mi_assert_internal(memid.memkind < MI_MEM_OS); - } - - // purge expired decommits - mi_arenas_try_purge(false, false, stats); -} - -// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` -// for dynamic libraries that are unloaded and need to release all their allocated memory. -static void mi_arenas_unsafe_destroy(void) { - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - size_t new_max_arena = 0; - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); - if (arena != NULL) { - mi_lock_done(&arena->abandoned_visit_lock); - if (mi_memkind_is_os(arena->memid.memkind)) { - mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); - _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid, &_mi_stats_main); - } - } - } - - // try to lower the max arena. - size_t expected = max_arena; - mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); -} - -// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired -void _mi_arenas_collect(bool force_purge, mi_stats_t* stats) { - mi_arenas_try_purge(force_purge, force_purge /* visit all? */, stats); -} - -// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` -// for dynamic libraries that are unloaded and need to release all their allocated memory. -void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { - mi_arenas_unsafe_destroy(); - _mi_arenas_collect(true /* force purge */, stats); // purge non-owned arenas -} - -// Is a pointer inside any of our arenas? -bool _mi_arena_contains(const void* p) { - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_blocks(arena->block_count) > (const uint8_t*)p) { - return true; - } - } - return false; -} - - -/* ----------------------------------------------------------- - Add an arena. ------------------------------------------------------------ */ - -static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { - mi_assert_internal(arena != NULL); - mi_assert_internal(arena->block_count > 0); - if (arena_id != NULL) { *arena_id = -1; } - - size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); - if (i >= MI_MAX_ARENAS) { - mi_atomic_decrement_acq_rel(&mi_arena_count); - return false; - } - _mi_stat_counter_increase(&stats->arena_count,1); - arena->id = mi_arena_id_create(i); - mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); - if (arena_id != NULL) { *arena_id = arena->id; } - return true; -} - -static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept -{ - mi_assert(!is_large || memid.initially_committed && memid.is_pinned); - mi_assert(_mi_is_aligned(start,MI_ARENA_BLOCK_SIZE)); - mi_assert(start!=NULL); - if (start==NULL) return false; - if (!_mi_is_aligned(start,MI_ARENA_BLOCK_SIZE)) { - // todo: use alignment in memid to align to blocksize first? - _mi_warning_message("cannot use OS memory since it is not aligned to %zu KiB (address %p)", MI_ARENA_BLOCK_SIZE/MI_KiB, start); - return false; - } - - if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } - - const size_t info_blocks = mi_arena_info_blocks(); - const size_t bcount = size / MI_ARENA_BLOCK_SIZE; // divide down - if (bcount < info_blocks+1) { - _mi_warning_message("cannot use OS memory since it is not large enough (size %zu KiB, minimum required is %zu KiB)", size/MI_KiB, mi_size_of_blocks(info_blocks+1)/MI_KiB); - return false; - } - if (bcount > MI_BITMAP_MAX_BITS) { - // todo: allow larger areas (either by splitting it up in arena's or having larger arena's) - _mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_blocks(MI_BITMAP_MAX_BITS)/MI_MiB); - return false; - } - mi_arena_t* arena = (mi_arena_t*)start; - - // commit & zero if needed - bool is_zero = memid.initially_zero; - if (!memid.initially_committed) { - _mi_os_commit(arena, mi_size_of_blocks(info_blocks), &is_zero, &_mi_stats_main); - } - if (!is_zero) { - _mi_memzero(arena, mi_size_of_blocks(info_blocks)); - } - - // init - arena->id = _mi_arena_id_none(); - arena->memid = memid; - arena->exclusive = exclusive; - arena->block_count = bcount; - arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) - arena->is_large = is_large; - arena->purge_expire = 0; - mi_lock_init(&arena->abandoned_visit_lock); - - // init bitmaps - mi_bitmap_init(&arena->blocks_free,true); - mi_bitmap_init(&arena->blocks_committed,true); - mi_bitmap_init(&arena->blocks_dirty,true); - mi_bitmap_init(&arena->blocks_purge,true); - for( int i = 0; i < MI_ARENA_BIN_COUNT; i++) { - mi_bitmap_init(&arena->blocks_abandoned[i],true); - } - - // reserve our meta info (and reserve blocks outside the memory area) - mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->blocks_free, info_blocks /* start */, arena->block_count - info_blocks); - if (memid.initially_committed) { - mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->blocks_committed, 0, arena->block_count); - } - else { - mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, 0, info_blocks, NULL); - } - mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_dirty, 0, info_blocks, NULL); - - return mi_arena_add(arena, arena_id, &_mi_stats_main); -} - - -bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); - memid.initially_committed = is_committed; - memid.initially_zero = is_zero; - memid.is_pinned = is_large; - return mi_manage_os_memory_ex2(start, size, is_large, numa_node, exclusive, memid, arena_id); -} - -// Reserve a range of regular OS memory -int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - if (arena_id != NULL) *arena_id = _mi_arena_id_none(); - size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block - mi_memid_t memid; - void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main); - if (start == NULL) return ENOMEM; - const bool is_large = memid.is_pinned; // todo: use separate is_large field? - if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { - _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); - _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); - return ENOMEM; - } - _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); - return 0; -} - - -// Manage a range of regular OS memory -bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { - return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); -} - -// Reserve a range of regular OS memory -int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { - return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); -} - - -/* ----------------------------------------------------------- - Debugging ------------------------------------------------------------ */ -static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf) { - size_t bit_set_count = 0; - for (int bit = 0; bit < MI_BFIELD_BITS; bit++) { - bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0); - if (is_set) bit_set_count++; - buf[bit] = (is_set ? 'x' : '.'); - } - return bit_set_count; -} - -static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_t* bitmap) { - _mi_verbose_message("%s%s:\n", prefix, header); - size_t bit_count = 0; - size_t bit_set_count = 0; - for (int i = 0; i < MI_BFIELD_BITS && bit_count < block_count; i++) { - char buf[MI_BITMAP_CHUNK_BITS + 1]; - mi_bitmap_chunk_t* chunk = &bitmap->chunks[i]; - for (int j = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) { - if (bit_count < block_count) { - bit_set_count += mi_debug_show_bfield(chunk->bfields[j], buf + j*MI_BFIELD_BITS); - } - else { - _mi_memset(buf + j*MI_BFIELD_BITS, ' ', MI_BFIELD_BITS); - } - bit_count += MI_BFIELD_BITS; - } - buf[MI_BITMAP_CHUNK_BITS] = 0; - _mi_verbose_message("%s %s\n", prefix, buf); - } - _mi_verbose_message("%s total ('x'): %zu\n", prefix, bit_set_count); - return bit_set_count; -} - -void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept { - MI_UNUSED(show_abandoned); - size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count); - size_t free_total = 0; - size_t block_total = 0; - //size_t abandoned_total = 0; - size_t purge_total = 0; - for (size_t i = 0; i < max_arenas; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; - block_total += arena->block_count; - _mi_verbose_message("arena %zu: %zu blocks%s\n", i, arena->block_count, (arena->memid.is_pinned ? ", pinned" : "")); - if (show_inuse) { - free_total += mi_debug_show_bitmap(" ", "free blocks", arena->block_count, &arena->blocks_free); - } - mi_debug_show_bitmap(" ", "committed blocks", arena->block_count, &arena->blocks_committed); - // todo: abandoned blocks - if (show_purge) { - purge_total += mi_debug_show_bitmap(" ", "purgeable blocks", arena->block_count, &arena->blocks_purge); - } - } - if (show_inuse) _mi_verbose_message("total inuse blocks : %zu\n", block_total - free_total); - // if (show_abandoned) _mi_verbose_message("total abandoned blocks: %zu\n", abandoned_total); - if (show_purge) _mi_verbose_message("total purgeable blocks: %zu\n", purge_total); -} - - -/* ----------------------------------------------------------- - Reserve a huge page arena. ------------------------------------------------------------ */ -// reserve at a specific numa node -int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - if (arena_id != NULL) *arena_id = -1; - if (pages==0) return 0; - if (numa_node < -1) numa_node = -1; - if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); - size_t hsize = 0; - size_t pages_reserved = 0; - mi_memid_t memid; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid); - if (p==NULL || pages_reserved==0) { - _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); - return ENOMEM; - } - _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - - if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { - _mi_os_free(p, hsize, memid, &_mi_stats_main); - return ENOMEM; - } - return 0; -} - -int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { - return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL); -} - -// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) -int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { - if (pages == 0) return 0; - - // pages per numa node - size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); - if (numa_count <= 0) numa_count = 1; - const size_t pages_per = pages / numa_count; - const size_t pages_mod = pages % numa_count; - const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); - - // reserve evenly among numa nodes - for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { - size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); - if (err) return err; - if (pages < node_pages) { - pages = 0; - } - else { - pages -= node_pages; - } - } - - return 0; -} - -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { - MI_UNUSED(max_secs); - _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); - if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); - if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; - return err; -} - - - -/* ----------------------------------------------------------- - Arena purge ------------------------------------------------------------ */ - -static long mi_arena_purge_delay(void) { - // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay - return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); -} - -// reset or decommit in an arena and update the committed/decommit bitmaps -// assumes we own the area (i.e. blocks_free is claimed by us) -static void mi_arena_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats) { - mi_assert_internal(!arena->memid.is_pinned); - const size_t size = mi_size_of_blocks(blocks); - void* const p = mi_arena_block_start(arena, block_idx); - bool needs_recommit; - if (mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_idx, blocks)) { - // all blocks are committed, we can purge freely - needs_recommit = _mi_os_purge(p, size, stats); - } - else { - // some blocks are not committed -- this can happen when a partially committed block is freed - // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge - // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), - // and also undo the decommit stats (as it was already adjusted) - mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); - needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); - if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); } - } - - // clear the purged blocks - mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_purge, blocks, block_idx, NULL); - - // update committed bitmap - if (needs_recommit) { - mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_committed, blocks, block_idx, NULL); - } -} - - -// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. -// Note: assumes we (still) own the area as we may purge immediately -static void mi_arena_schedule_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats) { - const long delay = mi_arena_purge_delay(); - if (delay < 0) return; // is purging allowed at all? - - if (_mi_preloading() || delay == 0) { - // decommit directly - mi_arena_purge(arena, block_idx, blocks, stats); - } - else { - // schedule decommit - _mi_error_message(EFAULT, "purging not yet implemented\n"); - } -} - - -static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats) { - if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled - - const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); - if (max_arena == 0) return; - - _mi_error_message(EFAULT, "purging not yet implemented\n"); - MI_UNUSED(stats); - MI_UNUSED(visit_all); - MI_UNUSED(force); -} - - -#if 0 - #define MI_IN_ARENA_C #include "arena-abandon.c" #undef MI_IN_ARENA_C @@ -904,12 +116,12 @@ static size_t mi_block_count_of_size(size_t size) { return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } -static size_t mi_size_of_blocks(size_t bcount) { +static size_t mi_arena_block_size(size_t bcount) { return (bcount * MI_ARENA_BLOCK_SIZE); } static size_t mi_arena_size(mi_arena_t* arena) { - return mi_size_of_blocks(arena->block_count); + return mi_arena_block_size(arena->block_count); } static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { @@ -995,7 +207,7 @@ void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) { } void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { - return (arena->start + mi_size_of_blocks(mi_bitmap_index_bit(bindex))); + return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex))); } @@ -1004,7 +216,7 @@ void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { ----------------------------------------------------------- */ // claim the `blocks_inuse` bits -static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, size_t block_idx, mi_stats_t* stats) +static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx, stats)) { @@ -1056,7 +268,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { bool commit_zero = false; - if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) { + if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) { memid->initially_committed = false; } else { @@ -1081,7 +293,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no const size_t bcount = mi_block_count_of_size(size); const size_t arena_index = mi_arena_id_index(arena_id); mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); - mi_assert_internal(size <= mi_size_of_blocks(bcount)); + mi_assert_internal(size <= mi_arena_block_size(bcount)); // Check arena suitability mi_arena_t* arena = mi_arena_from_index(arena_index); @@ -1227,7 +439,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (arena_index >= MI_MAX_ARENAS) return NULL; mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; - if (size != NULL) { *size = mi_size_of_blocks(arena->block_count); } + if (size != NULL) { *size = mi_arena_block_size(arena->block_count); } return arena->start; } @@ -1247,7 +459,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(!arena->memid.is_pinned); - const size_t size = mi_size_of_blocks(blocks); + const size_t size = mi_arena_block_size(blocks); void* const p = mi_arena_block_start(arena, bitmap_idx); bool needs_recommit; if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { @@ -1299,25 +511,25 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t // purge a range of blocks // return true if the full range was purged. // assumes we own the area (i.e. blocks_in_use is claimed by us) -static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startseqx, size_t bitlen, size_t purge, mi_stats_t* stats) { - const size_t endidx = startseqx + bitlen; - size_t bitseqx = startseqx; +static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { + const size_t endidx = startidx + bitlen; + size_t bitidx = startidx; bool all_purged = false; - while (bitseqx < endidx) { + while (bitidx < endidx) { // count consecutive ones in the purge mask size_t count = 0; - while (bitseqx + count < endidx && (purge & ((size_t)1 << (bitseqx + count))) != 0) { + while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) { count++; } if (count > 0) { // found range to be purged - const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitseqx); + const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx); mi_arena_purge(arena, range_idx, count, stats); if (count == bitlen) { all_purged = true; } } - bitseqx += (count+1); // +1 to skip the zero bit (or end) + bitidx += (count+1); // +1 to skip the zero bit (or end) } return all_purged; } @@ -1339,16 +551,16 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi for (size_t i = 0; i < arena->field_count; i++) { size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]); if (purge != 0) { - size_t bitseqx = 0; - while (bitseqx < MI_BITMAP_FIELD_BITS) { + size_t bitidx = 0; + while (bitidx < MI_BITMAP_FIELD_BITS) { // find consecutive range of ones in the purge mask size_t bitlen = 0; - while (bitseqx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitseqx + bitlen))) != 0) { + while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) { bitlen++; } // temporarily claim the purge range as "in-use" to be thread-safe with allocation // try to claim the longest range of corresponding in_use bits - const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitseqx); + const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); while( bitlen > 0 ) { if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { break; @@ -1359,15 +571,15 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi if (bitlen > 0) { // read purge again now that we have the in_use bits purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); - if (!mi_arena_purge_range(arena, i, bitseqx, bitlen, purge, stats)) { + if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { full_purge = false; } any_purged = true; // release the claimed `in_use` bits again _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index); } - bitseqx += (bitlen+1); // +1 to skip the zero (or end) - } // while bitseqx + bitidx += (bitlen+1); // +1 to skip the zero (or end) + } // while bitidx } // purge != 0 } // if not fully purged, make sure to purge again in the future @@ -1530,7 +742,7 @@ bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_size_of_blocks(arena->block_count) > (const uint8_t*)p) { + if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { return true; } } @@ -1606,8 +818,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int mi_assert_internal(post >= 0); if (post > 0) { // don't use leftover bits at the end - mi_bitmap_index_t postseqx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - _mi_bitmap_claim(arena->blocks_inuse, fields, post, postseqx, NULL); + mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } return mi_arena_add(arena, arena_id, &_mi_stats_main); @@ -1774,4 +986,3 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv } -#endif \ No newline at end of file diff --git a/src/arena.c b/src/arena.c index 8ca5aaf3..28ad61f1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -21,7 +21,6 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo #include "mimalloc.h" #include "mimalloc/internal.h" -#include "mimalloc/atomic.h" #include "bitmap.h" @@ -29,38 +28,823 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo Arena allocation ----------------------------------------------------------- */ +#define MI_ARENA_BIN_COUNT (MI_BIN_COUNT) + + // A memory arena descriptor typedef struct mi_arena_s { - mi_arena_id_t id; // arena id; 0 for non-specific mi_memid_t memid; // memid of the memory area - _Atomic(uint8_t*)start; // the start of the memory area + mi_arena_id_t id; // arena id; 0 for non-specific + size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) - size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) - size_t meta_size; // size of the arena structure itself (including its bitmaps) - mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node bool exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited - _Atomic(size_t)search_idx; // optimization to start the search for free blocks - _Atomic(mi_msecs_t)purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. - mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) - mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) - mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) - mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) - // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. + + mi_bitmap_t blocks_free; // is the block free? + mi_bitmap_t blocks_committed; // is the block committed? (i.e. accessible) + mi_bitmap_t blocks_purge; // can the block be purged? (block in purge => block in free) + mi_bitmap_t blocks_dirty; // is the block potentially non-zero? + mi_bitmap_t blocks_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page) + // the full queue contains abandoned full pages } mi_arena_t; - -#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB -#define MI_MAX_ARENAS (132) // Limited as the reservation exponentially increases (and takes up .bss) +#define MI_MAX_ARENAS (1024) // Limited for now (and takes up .bss) // The available arenas static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 + +/* ----------------------------------------------------------- + Arena id's + id = arena_index + 1 +----------------------------------------------------------- */ + +size_t mi_arena_id_index(mi_arena_id_t id) { + return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); +} + +static mi_arena_id_t mi_arena_id_create(size_t arena_index) { + mi_assert_internal(arena_index < MI_MAX_ARENAS); + return (int)arena_index + 1; +} + +mi_arena_id_t _mi_arena_id_none(void) { + return 0; +} + +static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { + return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || + (arena_id == req_arena_id)); +} + +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { + if (memid.memkind == MI_MEM_ARENA) { + return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); + } + else { + return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id); + } +} + +size_t mi_arena_get_count(void) { + return mi_atomic_load_relaxed(&mi_arena_count); +} + +mi_arena_t* mi_arena_from_index(size_t idx) { + mi_assert_internal(idx < mi_arena_get_count()); + return mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[idx]); +} + + + +/* ----------------------------------------------------------- + Util +----------------------------------------------------------- */ + + +// Size of an arena +static size_t mi_arena_size(mi_arena_t* arena) { + return mi_size_of_blocks(arena->block_count); +} + +static size_t mi_arena_info_blocks(void) { + const size_t os_page_size = _mi_os_page_size(); + const size_t info_size = _mi_align_up(sizeof(mi_arena_t), os_page_size) + os_page_size; // + guard page + const size_t info_blocks = mi_block_count_of_size(info_size); + return info_blocks; +} + + +// Start of the arena memory area +static uint8_t* mi_arena_start(mi_arena_t* arena) { + return ((uint8_t*)arena); +} + +// Start of a block +void* mi_arena_block_start(mi_arena_t* arena, size_t block_index) { + return (mi_arena_start(arena) + mi_size_of_blocks(block_index)); +} + +// Arena area +void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { + if (size != NULL) *size = 0; + const size_t arena_index = mi_arena_id_index(arena_id); + if (arena_index >= MI_MAX_ARENAS) return NULL; + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + if (arena == NULL) return NULL; + if (size != NULL) { *size = mi_size_of_blocks(arena->block_count); } + return mi_arena_start(arena); +} + + +// Create an arena memid +static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, size_t block_index) { + mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); + memid.mem.arena.id = id; + memid.mem.arena.block_index = block_index; + memid.mem.arena.is_exclusive = is_exclusive; + return memid; +} + +// returns if the arena is exclusive +bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, size_t* block_index) { + mi_assert_internal(memid.memkind == MI_MEM_ARENA); + *arena_index = mi_arena_id_index(memid.mem.arena.id); + *block_index = memid.mem.arena.block_index; + return memid.mem.arena.is_exclusive; +} + + + +/* ----------------------------------------------------------- + Arena Allocation +----------------------------------------------------------- */ + +static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool commit, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) +{ + MI_UNUSED(arena_index); + mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); + + size_t block_index; + if (!mi_bitmap_try_find_and_clearN(&arena->blocks_free, tseq, needed_bcount, &block_index)) return NULL; + + // claimed it! + void* p = mi_arena_block_start(arena, block_index); + *memid = mi_memid_create_arena(arena->id, arena->exclusive, block_index); + memid->is_pinned = arena->memid.is_pinned; + + // set the dirty bits + if (arena->memid.initially_zero) { + memid->initially_zero = mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_dirty, block_index, needed_bcount, NULL); + } + + // set commit state + if (commit) { + // commit requested, but the range may not be committed as a whole: ensure it is committed now + memid->initially_committed = true; + + bool all_already_committed; + mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount, &all_already_committed); + if (!all_already_committed) { + bool commit_zero = false; + if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) { + memid->initially_committed = false; + } + else { + if (commit_zero) { memid->initially_zero = true; } + } + } + } + else { + // no need to commit, but check if already fully committed + memid->initially_committed = mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount); + } + + return p; +} + +// allocate in a speficic arena +static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, + size_t size, size_t alignment, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) +{ + mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN); + if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL; + + const size_t bcount = mi_block_count_of_size(size); + const size_t arena_index = mi_arena_id_index(arena_id); + mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); + mi_assert_internal(size <= mi_size_of_blocks(bcount)); + + // Check arena suitability + mi_arena_t* arena = mi_arena_from_index(arena_index); + if (arena == NULL) return NULL; + if (!allow_large && arena->is_large) return NULL; + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; + if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity + const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); + if (match_numa_node) { if (!numa_suitable) return NULL; } + else { if (numa_suitable) return NULL; } + } + + // try to allocate + void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, tseq, memid, tld); + mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment)); + return p; +} + + +// allocate from an arena with fallback to the OS +static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld) +{ + mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN); + if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL; + + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + if mi_likely(max_arena == 0) return NULL; + + if (req_arena_id != _mi_arena_id_none()) { + // try a specific arena if requested + if (mi_arena_id_index(req_arena_id) < max_arena) { + void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); + if (p != NULL) return p; + } + } + else { + // try numa affine allocation + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); + if (p != NULL) return p; + } + + // try from another numa node instead.. + if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); + if (p != NULL) return p; + } + } + } + return NULL; +} + +// try to reserve a fresh arena space +static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) +{ + if (_mi_preloading()) return false; // use OS only while pre loading + if (req_arena_id != _mi_arena_id_none()) return false; + + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); + if (arena_count > (MI_MAX_ARENAS - 4)) return false; + + // calc reserve + size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); + if (arena_reserve == 0) return false; + + if (!_mi_os_has_virtual_reserve()) { + arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example) + } + arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); + + if (arena_count >= 8 && arena_count <= 128) { + // scale up the arena sizes exponentially every 8 entries (128 entries get to 589TiB) + const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16); + size_t reserve = 0; + if (!mi_mul_overflow(multiplier, arena_reserve, &reserve)) { + arena_reserve = reserve; + } + } + + // check arena bounds + const size_t min_reserve = mi_size_of_blocks(mi_arena_info_blocks() + 1); + const size_t max_reserve = MI_BITMAP_MAX_BITS * MI_ARENA_BLOCK_SIZE; + if (arena_reserve < min_reserve) { + arena_reserve = min_reserve; + } + else if (arena_reserve > max_reserve) { + arena_reserve = max_reserve; + } + + if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size + + // commit eagerly? + bool arena_commit = false; + if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } + else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + + return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0); +} + + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + size_t tseq = _mi_thread_seq_id(); + *memid = _mi_memid_none(); + + const int numa_node = _mi_os_numa_node(tld); // current numa node + + // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed? + if (size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && alignment <= MI_ARENA_BLOCK_ALIGN && align_offset == 0) { + void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); + if (p != NULL) return p; + + // otherwise, try to first eagerly reserve a new arena + if (req_arena_id == _mi_arena_id_none()) { + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { + // and try allocate in there + mi_assert_internal(req_arena_id == _mi_arena_id_none()); + p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld); + if (p != NULL) return p; + } + } + } + } + + // if we cannot use OS allocation, return NULL + if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) { + errno = ENOMEM; + return NULL; + } + + // finally, fall back to the OS + if (align_offset > 0) { + return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); + } + else { + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); + } +} + +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) +{ + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); +} + + +/* ----------------------------------------------------------- + Arena free +----------------------------------------------------------- */ +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats); +static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats); + +void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); + mi_assert_internal(committed_size <= size); + if (p==NULL) return; + if (size==0) return; + const bool all_committed = (committed_size == size); + + // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) + mi_track_mem_undefined(p, size); + + if (mi_memkind_is_os(memid.memkind)) { + // was a direct OS allocation, pass through + if (!all_committed && committed_size > 0) { + // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) + _mi_stat_decrease(&_mi_stats_main.committed, committed_size); + } + _mi_os_free(p, size, memid, stats); + } + else if (memid.memkind == MI_MEM_ARENA) { + // allocated in an arena + size_t arena_idx; + size_t block_idx; + mi_arena_memid_indices(memid, &arena_idx, &block_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); + mi_assert_internal(arena != NULL); + const size_t blocks = mi_block_count_of_size(size); + + // checks + if (arena == NULL) { + _mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + mi_assert_internal(block_idx < arena->block_count); + mi_assert_internal(block_idx > mi_arena_info_blocks()); + if (block_idx <= mi_arena_info_blocks() || block_idx > arena->block_count) { + _mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + + // potentially decommit + if (arena->memid.is_pinned || arena->memid.initially_committed) { + mi_assert_internal(all_committed); + } + else { + if (!all_committed) { + // mark the entire range as no longer committed (so we recommit the full range when re-using) + mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_committed, blocks, block_idx, NULL); + mi_track_mem_noaccess(p, size); + if (committed_size > 0) { + // if partially committed, adjust the committed stats (is it will be recommitted when re-using) + // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. + _mi_stat_decrease(&_mi_stats_main.committed, committed_size); + } + // note: if not all committed, it may be that the purge will reset/decommit the entire range + // that contains already decommitted parts. Since purge consistently uses reset or decommit that + // works (as we should never reset decommitted parts). + } + // (delay) purge the entire range + mi_arena_schedule_purge(arena, block_idx, blocks, stats); + } + + // and make it available to others again + bool all_inuse = mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_free, block_idx, blocks, NULL); + if (!all_inuse) { + _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size); + return; + }; + } + else { + // arena was none, external, or static; nothing to do + mi_assert_internal(memid.memkind < MI_MEM_OS); + } + + // purge expired decommits + mi_arenas_try_purge(false, false, stats); +} + +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +static void mi_arenas_unsafe_destroy(void) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + size_t new_max_arena = 0; + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + mi_lock_done(&arena->abandoned_visit_lock); + if (mi_memkind_is_os(arena->memid.memkind)) { + mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid, &_mi_stats_main); + } + } + } + + // try to lower the max arena. + size_t expected = max_arena; + mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); +} + +// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired +void _mi_arenas_collect(bool force_purge, mi_stats_t* stats) { + mi_arenas_try_purge(force_purge, force_purge /* visit all? */, stats); +} + +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { + mi_arenas_unsafe_destroy(); + _mi_arenas_collect(true /* force purge */, stats); // purge non-owned arenas +} + +// Is a pointer inside any of our arenas? +bool _mi_arena_contains(const void* p) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_blocks(arena->block_count) > (const uint8_t*)p) { + return true; + } + } + return false; +} + + +/* ----------------------------------------------------------- + Add an arena. +----------------------------------------------------------- */ + +static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { + mi_assert_internal(arena != NULL); + mi_assert_internal(arena->block_count > 0); + if (arena_id != NULL) { *arena_id = -1; } + + size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); + if (i >= MI_MAX_ARENAS) { + mi_atomic_decrement_acq_rel(&mi_arena_count); + return false; + } + _mi_stat_counter_increase(&stats->arena_count,1); + arena->id = mi_arena_id_create(i); + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena->id; } + return true; +} + +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept +{ + mi_assert(!is_large || memid.initially_committed && memid.is_pinned); + mi_assert(_mi_is_aligned(start,MI_ARENA_BLOCK_SIZE)); + mi_assert(start!=NULL); + if (start==NULL) return false; + if (!_mi_is_aligned(start,MI_ARENA_BLOCK_SIZE)) { + // todo: use alignment in memid to align to blocksize first? + _mi_warning_message("cannot use OS memory since it is not aligned to %zu KiB (address %p)", MI_ARENA_BLOCK_SIZE/MI_KiB, start); + return false; + } + + if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } + + const size_t info_blocks = mi_arena_info_blocks(); + const size_t bcount = size / MI_ARENA_BLOCK_SIZE; // divide down + if (bcount < info_blocks+1) { + _mi_warning_message("cannot use OS memory since it is not large enough (size %zu KiB, minimum required is %zu KiB)", size/MI_KiB, mi_size_of_blocks(info_blocks+1)/MI_KiB); + return false; + } + if (bcount > MI_BITMAP_MAX_BITS) { + // todo: allow larger areas (either by splitting it up in arena's or having larger arena's) + _mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_blocks(MI_BITMAP_MAX_BITS)/MI_MiB); + return false; + } + mi_arena_t* arena = (mi_arena_t*)start; + + // commit & zero if needed + bool is_zero = memid.initially_zero; + if (!memid.initially_committed) { + _mi_os_commit(arena, mi_size_of_blocks(info_blocks), &is_zero, &_mi_stats_main); + } + if (!is_zero) { + _mi_memzero(arena, mi_size_of_blocks(info_blocks)); + } + + // init + arena->id = _mi_arena_id_none(); + arena->memid = memid; + arena->exclusive = exclusive; + arena->block_count = bcount; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) + arena->is_large = is_large; + arena->purge_expire = 0; + mi_lock_init(&arena->abandoned_visit_lock); + + // init bitmaps + mi_bitmap_init(&arena->blocks_free,true); + mi_bitmap_init(&arena->blocks_committed,true); + mi_bitmap_init(&arena->blocks_dirty,true); + mi_bitmap_init(&arena->blocks_purge,true); + for( int i = 0; i < MI_ARENA_BIN_COUNT; i++) { + mi_bitmap_init(&arena->blocks_abandoned[i],true); + } + + // reserve our meta info (and reserve blocks outside the memory area) + mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->blocks_free, info_blocks /* start */, arena->block_count - info_blocks); + if (memid.initially_committed) { + mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->blocks_committed, 0, arena->block_count); + } + else { + mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, 0, info_blocks, NULL); + } + mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_dirty, 0, info_blocks, NULL); + + return mi_arena_add(arena, arena_id, &_mi_stats_main); +} + + +bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); + memid.initially_committed = is_committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return mi_manage_os_memory_ex2(start, size, is_large, numa_node, exclusive, memid, arena_id); +} + +// Reserve a range of regular OS memory +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + if (arena_id != NULL) *arena_id = _mi_arena_id_none(); + size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block + mi_memid_t memid; + void* start = _mi_os_alloc_aligned(size, MI_ARENA_BLOCK_ALIGN, commit, allow_large, &memid, &_mi_stats_main); + if (start == NULL) return ENOMEM; + const bool is_large = memid.is_pinned; // todo: use separate is_large field? + if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); + return ENOMEM; + } + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); + return 0; +} + + +// Manage a range of regular OS memory +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { + return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); +} + +// Reserve a range of regular OS memory +int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { + return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); +} + + +/* ----------------------------------------------------------- + Debugging +----------------------------------------------------------- */ +static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf) { + size_t bit_set_count = 0; + for (int bit = 0; bit < MI_BFIELD_BITS; bit++) { + bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0); + if (is_set) bit_set_count++; + buf[bit] = (is_set ? 'x' : '.'); + } + return bit_set_count; +} + +static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_t* bitmap) { + _mi_verbose_message("%s%s:\n", prefix, header); + size_t bit_count = 0; + size_t bit_set_count = 0; + for (int i = 0; i < MI_BFIELD_BITS && bit_count < block_count; i++) { + char buf[MI_BITMAP_CHUNK_BITS + 1]; + mi_bitmap_chunk_t* chunk = &bitmap->chunks[i]; + for (int j = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) { + if (bit_count < block_count) { + bit_set_count += mi_debug_show_bfield(chunk->bfields[j], buf + j*MI_BFIELD_BITS); + } + else { + _mi_memset(buf + j*MI_BFIELD_BITS, ' ', MI_BFIELD_BITS); + } + bit_count += MI_BFIELD_BITS; + } + buf[MI_BITMAP_CHUNK_BITS] = 0; + _mi_verbose_message("%s %s\n", prefix, buf); + } + _mi_verbose_message("%s total ('x'): %zu\n", prefix, bit_set_count); + return bit_set_count; +} + +void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept { + MI_UNUSED(show_abandoned); + size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count); + size_t free_total = 0; + size_t block_total = 0; + //size_t abandoned_total = 0; + size_t purge_total = 0; + for (size_t i = 0; i < max_arenas; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; + block_total += arena->block_count; + _mi_verbose_message("arena %zu: %zu blocks%s\n", i, arena->block_count, (arena->memid.is_pinned ? ", pinned" : "")); + if (show_inuse) { + free_total += mi_debug_show_bitmap(" ", "free blocks", arena->block_count, &arena->blocks_free); + } + mi_debug_show_bitmap(" ", "committed blocks", arena->block_count, &arena->blocks_committed); + // todo: abandoned blocks + if (show_purge) { + purge_total += mi_debug_show_bitmap(" ", "purgeable blocks", arena->block_count, &arena->blocks_purge); + } + } + if (show_inuse) _mi_verbose_message("total inuse blocks : %zu\n", block_total - free_total); + // if (show_abandoned) _mi_verbose_message("total abandoned blocks: %zu\n", abandoned_total); + if (show_purge) _mi_verbose_message("total purgeable blocks: %zu\n", purge_total); +} + + +/* ----------------------------------------------------------- + Reserve a huge page arena. +----------------------------------------------------------- */ +// reserve at a specific numa node +int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + if (arena_id != NULL) *arena_id = -1; + if (pages==0) return 0; + if (numa_node < -1) numa_node = -1; + if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); + size_t hsize = 0; + size_t pages_reserved = 0; + mi_memid_t memid; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid); + if (p==NULL || pages_reserved==0) { + _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); + return ENOMEM; + } + _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); + + if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + _mi_os_free(p, hsize, memid, &_mi_stats_main); + return ENOMEM; + } + return 0; +} + +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { + return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL); +} + +// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { + if (pages == 0) return 0; + + // pages per numa node + size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); + if (numa_count <= 0) numa_count = 1; + const size_t pages_per = pages / numa_count; + const size_t pages_mod = pages % numa_count; + const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); + + // reserve evenly among numa nodes + for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + size_t node_pages = pages_per; // can be 0 + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if (err) return err; + if (pages < node_pages) { + pages = 0; + } + else { + pages -= node_pages; + } + } + + return 0; +} + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + MI_UNUSED(max_secs); + _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); + if (pages_reserved != NULL) *pages_reserved = 0; + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); + if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; + return err; +} + + + +/* ----------------------------------------------------------- + Abandoned pages +----------------------------------------------------------- */ + +void mi_arena_page_abandon(mi_page_t* page) { + mi_assert_internal(mi_page_is_abandoned(page)); + if (mi_page_is_full(page)) {} +} + + + +/* ----------------------------------------------------------- + Arena purge +----------------------------------------------------------- */ + +static long mi_arena_purge_delay(void) { + // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay + return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); +} + +// reset or decommit in an arena and update the committed/decommit bitmaps +// assumes we own the area (i.e. blocks_free is claimed by us) +static void mi_arena_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(!arena->memid.is_pinned); + const size_t size = mi_size_of_blocks(blocks); + void* const p = mi_arena_block_start(arena, block_idx); + bool needs_recommit; + if (mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_idx, blocks)) { + // all blocks are committed, we can purge freely + needs_recommit = _mi_os_purge(p, size, stats); + } + else { + // some blocks are not committed -- this can happen when a partially committed block is freed + // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), + // and also undo the decommit stats (as it was already adjusted) + mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); + if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); } + } + + // clear the purged blocks + mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_purge, blocks, block_idx, NULL); + + // update committed bitmap + if (needs_recommit) { + mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_committed, blocks, block_idx, NULL); + } +} + + +// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. +// Note: assumes we (still) own the area as we may purge immediately +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats) { + const long delay = mi_arena_purge_delay(); + if (delay < 0) return; // is purging allowed at all? + + if (_mi_preloading() || delay == 0) { + // decommit directly + mi_arena_purge(arena, block_idx, blocks, stats); + } + else { + // schedule decommit + _mi_error_message(EFAULT, "purging not yet implemented\n"); + } +} + + +static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats) { + if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled + + const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); + if (max_arena == 0) return; + + _mi_error_message(EFAULT, "purging not yet implemented\n"); + MI_UNUSED(stats); + MI_UNUSED(visit_all); + MI_UNUSED(force); +} + + +#if 0 + #define MI_IN_ARENA_C #include "arena-abandon.c" #undef MI_IN_ARENA_C @@ -116,12 +900,12 @@ static size_t mi_block_count_of_size(size_t size) { return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } -static size_t mi_arena_block_size(size_t bcount) { +static size_t mi_size_of_blocks(size_t bcount) { return (bcount * MI_ARENA_BLOCK_SIZE); } static size_t mi_arena_size(mi_arena_t* arena) { - return mi_arena_block_size(arena->block_count); + return mi_size_of_blocks(arena->block_count); } static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { @@ -207,7 +991,7 @@ void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) { } void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { - return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex))); + return (arena->start + mi_size_of_blocks(mi_bitmap_index_bit(bindex))); } @@ -216,7 +1000,7 @@ void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { ----------------------------------------------------------- */ // claim the `blocks_inuse` bits -static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) +static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, size_t block_idx, mi_stats_t* stats) { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx, stats)) { @@ -268,7 +1052,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { bool commit_zero = false; - if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) { + if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) { memid->initially_committed = false; } else { @@ -293,7 +1077,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no const size_t bcount = mi_block_count_of_size(size); const size_t arena_index = mi_arena_id_index(arena_id); mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); - mi_assert_internal(size <= mi_arena_block_size(bcount)); + mi_assert_internal(size <= mi_size_of_blocks(bcount)); // Check arena suitability mi_arena_t* arena = mi_arena_from_index(arena_index); @@ -439,7 +1223,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (arena_index >= MI_MAX_ARENAS) return NULL; mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; - if (size != NULL) { *size = mi_arena_block_size(arena->block_count); } + if (size != NULL) { *size = mi_size_of_blocks(arena->block_count); } return arena->start; } @@ -459,7 +1243,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(!arena->memid.is_pinned); - const size_t size = mi_arena_block_size(blocks); + const size_t size = mi_size_of_blocks(blocks); void* const p = mi_arena_block_start(arena, bitmap_idx); bool needs_recommit; if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { @@ -511,25 +1295,25 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t // purge a range of blocks // return true if the full range was purged. // assumes we own the area (i.e. blocks_in_use is claimed by us) -static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { - const size_t endidx = startidx + bitlen; - size_t bitidx = startidx; +static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startseqx, size_t bitlen, size_t purge, mi_stats_t* stats) { + const size_t endidx = startseqx + bitlen; + size_t bitseqx = startseqx; bool all_purged = false; - while (bitidx < endidx) { + while (bitseqx < endidx) { // count consecutive ones in the purge mask size_t count = 0; - while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) { + while (bitseqx + count < endidx && (purge & ((size_t)1 << (bitseqx + count))) != 0) { count++; } if (count > 0) { // found range to be purged - const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx); + const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitseqx); mi_arena_purge(arena, range_idx, count, stats); if (count == bitlen) { all_purged = true; } } - bitidx += (count+1); // +1 to skip the zero bit (or end) + bitseqx += (count+1); // +1 to skip the zero bit (or end) } return all_purged; } @@ -551,16 +1335,16 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi for (size_t i = 0; i < arena->field_count; i++) { size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]); if (purge != 0) { - size_t bitidx = 0; - while (bitidx < MI_BITMAP_FIELD_BITS) { + size_t bitseqx = 0; + while (bitseqx < MI_BITMAP_FIELD_BITS) { // find consecutive range of ones in the purge mask size_t bitlen = 0; - while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) { + while (bitseqx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitseqx + bitlen))) != 0) { bitlen++; } // temporarily claim the purge range as "in-use" to be thread-safe with allocation // try to claim the longest range of corresponding in_use bits - const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); + const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitseqx); while( bitlen > 0 ) { if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { break; @@ -571,15 +1355,15 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi if (bitlen > 0) { // read purge again now that we have the in_use bits purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); - if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { + if (!mi_arena_purge_range(arena, i, bitseqx, bitlen, purge, stats)) { full_purge = false; } any_purged = true; // release the claimed `in_use` bits again _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index); } - bitidx += (bitlen+1); // +1 to skip the zero (or end) - } // while bitidx + bitseqx += (bitlen+1); // +1 to skip the zero (or end) + } // while bitseqx } // purge != 0 } // if not fully purged, make sure to purge again in the future @@ -742,7 +1526,7 @@ bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { + if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_size_of_blocks(arena->block_count) > (const uint8_t*)p) { return true; } } @@ -818,8 +1602,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int mi_assert_internal(post >= 0); if (post > 0) { // don't use leftover bits at the end - mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); + mi_bitmap_index_t postseqx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + _mi_bitmap_claim(arena->blocks_inuse, fields, post, postseqx, NULL); } return mi_arena_add(arena, arena_id, &_mi_stats_main); @@ -986,3 +1770,4 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv } +#endif \ No newline at end of file diff --git a/src/bitmap-old.c b/src/bitmap-old.c new file mode 100644 index 00000000..3e6311dc --- /dev/null +++ b/src/bitmap-old.c @@ -0,0 +1,419 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +Concurrent bitmap that can set/reset sequences of bits atomically, +represented as an array of fields where each field is a machine word (`size_t`) + +There are two api's; the standard one cannot have sequences that cross +between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). + +The `_across` postfixed functions do allow sequences that can cross over +between the fields. (This is used in arena allocation) +---------------------------------------------------------------------------- */ + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/bits.h" +#include "bitmap.h" + +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ + +// The bit mask for a given number of blocks at a specified bit index. +static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) { + mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + mi_assert_internal(count > 0); + if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; + if (count == 0) return 0; + return ((((size_t)1 << count) - 1) << bitidx); +} + + + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. +bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); + mi_bitmap_field_t* field = &bitmap[idx]; + size_t map = mi_atomic_load_relaxed(field); + if (map==MI_BITMAP_FIELD_FULL) return false; // short cut + + // search for 0-bit sequence of length count + const size_t mask = mi_bitmap_mask_(count, 0); + const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; + +#if MI_HAS_FAST_BITSCAN + size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible +#else + size_t bitidx = 0; // otherwise start at 0 +#endif + size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + + // scan linearly for a free range of zero bits + while (bitidx <= bitidx_max) { + const size_t mapm = (map & m); + if (mapm == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + const size_t newmap = (map | m); + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here? + // no success, another thread claimed concurrently.. keep going (with updated `map`) + continue; + } + else { + // success, we claimed the bits! + *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + return true; + } + } + else { + // on to the next bit range +#if MI_HAS_FAST_BITSCAN + mi_assert_internal(mapm != 0); + const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); + mi_assert_internal(shift > 0 && shift <= count); +#else + const size_t shift = 1; +#endif + bitidx += shift; + m <<= shift; + } + } + // no bits found + return false; +} + + +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. +bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) { idx = 0; } // wrap + if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + return false; +} + + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == mask); + const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); + return ((prev & mask) == mask); +} + + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); + size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); + if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); } + return ((prev & mask) == 0); +} + +// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. +static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + const size_t field = mi_atomic_load_relaxed(&bitmap[idx]); + if (any_ones != NULL) { *any_ones = ((field & mask) != 0); } + return ((field & mask) == mask); +} + +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); + do { + if ((expected & mask) != 0) return false; + } + while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); + mi_assert_internal((expected & mask) == 0); + return true; +} + + +bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} + + +//-------------------------------------------------------------------------- +// the `_across` functions work on bitmaps where sequences can cross over +// between the fields. This is used in arena allocation +//-------------------------------------------------------------------------- + +// Try to atomically claim a sequence of `count` bits starting from the field +// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. +// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`) +static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) +{ + mi_assert_internal(bitmap_idx != NULL); + + // check initial trailing zeros + mi_bitmap_field_t* field = &bitmap[idx]; + size_t map = mi_atomic_load_relaxed(field); + const size_t initial = mi_clz(map); // count of initial zeros starting at idx + mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); + if (initial == 0) return false; + if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) + if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries + + // scan ahead + size_t found = initial; + size_t mask = 0; // mask bits for the final field + while(found < count) { + field++; + map = mi_atomic_load_relaxed(field); + const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS); + mask = mi_bitmap_mask_(mask_bits, 0); + if ((map & mask) != 0) return false; // some part is already claimed + found += mask_bits; + } + mi_assert_internal(field < &bitmap[bitmap_fields]); + + // we found a range of contiguous zeros up to the final field; mask contains mask in the final field + // now try to claim the range atomically + mi_bitmap_field_t* const final_field = field; + const size_t final_mask = mask; + mi_bitmap_field_t* const initial_field = &bitmap[idx]; + const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial; + const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx); + + // initial field + size_t newmap; + field = initial_field; + map = mi_atomic_load_relaxed(field); + do { + newmap = (map | initial_mask); + if ((map & initial_mask) != 0) { goto rollback; }; + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + + // intermediate fields + while (++field < final_field) { + newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); + map = 0; + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; } + } + + // final field + mi_assert_internal(field == final_field); + map = mi_atomic_load_relaxed(field); + do { + newmap = (map | final_mask); + if ((map & final_mask) != 0) { goto rollback; } + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + + // claimed! + mi_stat_counter_increase(stats->arena_crossover_count,1); + *bitmap_idx = mi_bitmap_index_create(idx, initial_idx); + return true; + +rollback: + // roll back intermediate fields + // (we just failed to claim `field` so decrement first) + while (--field > initial_field) { + newmap = 0; + map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); + mi_assert_internal(mi_atomic_load_relaxed(field) == map); + mi_atomic_store_release(field, newmap); + } + if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`) + map = mi_atomic_load_relaxed(field); + do { + mi_assert_internal((map & initial_mask) == initial_mask); + newmap = (map & ~initial_mask); + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + } + mi_stat_counter_increase(stats->arena_rollback_count,1); + // retry? (we make a recursive call instead of goto to be able to use const declarations) + if (retries <= 2) { + return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats); + } + else { + return false; + } +} + + +// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) { + mi_assert_internal(count > 0); + if (count <= 2) { + // we don't bother with crossover fields for small counts + return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + } + + // visit the fields + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) { idx = 0; } // wrap + // first try to claim inside a field + /* + if (count <= MI_BITMAP_FIELD_BITS) { + if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + */ + // if that fails, then try to claim across fields + if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) { + return true; + } + } + return false; +} + +// Helper for masks across fields; returns the mid count, post_mask may be 0 +static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) { + MI_UNUSED(bitmap_fields); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) { + *pre_mask = mi_bitmap_mask_(count, bitidx); + *mid_mask = 0; + *post_mask = 0; + mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields); + return 0; + } + else { + const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx; + mi_assert_internal(pre_bits < count); + *pre_mask = mi_bitmap_mask_(pre_bits, bitidx); + count -= pre_bits; + const size_t mid_count = (count / MI_BITMAP_FIELD_BITS); + *mid_mask = MI_BITMAP_FIELD_FULL; + count %= MI_BITMAP_FIELD_BITS; + *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0)); + mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields); + return mid_count; + } +} + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + size_t pre_mask; + size_t mid_mask; + size_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_one = true; + mi_bitmap_field_t* field = &bitmap[idx]; + size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part + if ((prev & pre_mask) != pre_mask) all_one = false; + while(mid_count-- > 0) { + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part + if ((prev & mid_mask) != mid_mask) all_one = false; + } + if (post_mask!=0) { + prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part + if ((prev & post_mask) != post_mask) all_one = false; + } + return all_one; +} + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + size_t pre_mask; + size_t mid_mask; + size_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_zero = true; + bool any_zero = false; + _Atomic(size_t)*field = &bitmap[idx]; + size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); + if ((prev & pre_mask) != 0) all_zero = false; + if ((prev & pre_mask) != pre_mask) any_zero = true; + while (mid_count-- > 0) { + prev = mi_atomic_or_acq_rel(field++, mid_mask); + if ((prev & mid_mask) != 0) all_zero = false; + if ((prev & mid_mask) != mid_mask) any_zero = true; + } + if (post_mask!=0) { + prev = mi_atomic_or_acq_rel(field, post_mask); + if ((prev & post_mask) != 0) all_zero = false; + if ((prev & post_mask) != post_mask) any_zero = true; + } + if (pany_zero != NULL) { *pany_zero = any_zero; } + return all_zero; +} + + +// Returns `true` if all `count` bits were 1. +// `any_ones` is `true` if there was at least one bit set to one. +static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + size_t pre_mask; + size_t mid_mask; + size_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_ones = true; + bool any_ones = false; + mi_bitmap_field_t* field = &bitmap[idx]; + size_t prev = mi_atomic_load_relaxed(field++); + if ((prev & pre_mask) != pre_mask) all_ones = false; + if ((prev & pre_mask) != 0) any_ones = true; + while (mid_count-- > 0) { + prev = mi_atomic_load_relaxed(field++); + if ((prev & mid_mask) != mid_mask) all_ones = false; + if ((prev & mid_mask) != 0) any_ones = true; + } + if (post_mask!=0) { + prev = mi_atomic_load_relaxed(field); + if ((prev & post_mask) != post_mask) all_ones = false; + if ((prev & post_mask) != 0) any_ones = true; + } + if (pany_ones != NULL) { *pany_ones = any_ones; } + return all_ones; +} + +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} diff --git a/src/bitmap-old.h b/src/bitmap-old.h new file mode 100644 index 00000000..f8898935 --- /dev/null +++ b/src/bitmap-old.h @@ -0,0 +1,110 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +Concurrent bitmap that can set/reset sequences of bits atomically, +represented as an array of fields where each field is a machine word (`size_t`) + +There are two api's; the standard one cannot have sequences that cross +between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). +(this is used in region allocation) + +The `_across` postfixed functions do allow sequences that can cross over +between the fields. (This is used in arena allocation) +---------------------------------------------------------------------------- */ +#pragma once +#ifndef MI_BITMAP_H +#define MI_BITMAP_H + +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ + +#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE) +#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set + +// An atomic bitmap of `size_t` fields +typedef _Atomic(size_t) mi_bitmap_field_t; +typedef mi_bitmap_field_t* mi_bitmap_t; + +// A bitmap index is the index of the bit in a bitmap. +typedef size_t mi_bitmap_index_t; + +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS); + return (idx*MI_BITMAP_FIELD_BITS) + bitidx; +} +static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); + return mi_bitmap_index_create_ex(idx,bitidx); +} + +// Get the field index from a bit index. +static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx / MI_BITMAP_FIELD_BITS); +} + +// Get the bit index in a bitmap field +static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx % MI_BITMAP_FIELD_BITS); +} + +// Get the full bit index +static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx; +} + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. +bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx); + +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. +bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero); + +bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + + +//-------------------------------------------------------------------------- +// the `_across` functions work on bitmaps where sequences can cross over +// between the fields. This is used in arena allocation +//-------------------------------------------------------------------------- + +// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats); + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); + +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +#endif diff --git a/src/bitmap.c b/src/bitmap.c index 3e6311dc..463d74c7 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1,19 +1,12 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2023 Microsoft Research, Daan Leijen +Copyright (c) 2019-2024 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- -Concurrent bitmap that can set/reset sequences of bits atomically, -represented as an array of fields where each field is a machine word (`size_t`) - -There are two api's; the standard one cannot have sequences that cross -between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). - -The `_across` postfixed functions do allow sequences that can cross over -between the fields. (This is used in arena allocation) +Concurrent bitmap that can set/reset sequences of bits atomically ---------------------------------------------------------------------------- */ #include "mimalloc.h" @@ -21,399 +14,586 @@ between the fields. (This is used in arena allocation) #include "mimalloc/bits.h" #include "bitmap.h" -/* ----------------------------------------------------------- - Bitmap definition ------------------------------------------------------------ */ +/* -------------------------------------------------------------------------------- + bfields +-------------------------------------------------------------------------------- */ -// The bit mask for a given number of blocks at a specified bit index. -static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) { - mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); - mi_assert_internal(count > 0); - if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; - if (count == 0) return 0; - return ((((size_t)1 << count) - 1) << bitidx); +static inline size_t mi_bfield_ctz(mi_bfield_t x) { + return mi_ctz(x); +} + +static inline size_t mi_bfield_clz(mi_bfield_t x) { + return mi_clz(x); +} + +// find the least significant bit that is set (i.e. count trailing zero's) +// return false if `x==0` (with `*idx` undefined) and true otherwise, +// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). +static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) { + return mi_bsf(x,idx); +} + +static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) { + return mi_rotr(x,r); +} + +// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0). +static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) { + mi_assert_internal(idx < MI_BFIELD_BITS); + const mi_bfield_t mask = ((mi_bfield_t)1)<> bitidx) == mask); // no overflow? - const size_t newmap = (map | m); - mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here? - // no success, another thread claimed concurrently.. keep going (with updated `map`) - continue; +static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) { + mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS); + const size_t i = cidx / MI_BFIELD_BITS; + const size_t idx = cidx % MI_BFIELD_BITS; + return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx); +} + +static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) { + mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS); + const size_t i = byte_idx / MI_BFIELD_SIZE; + const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE; + return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx); +} + +// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0) +static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) { + mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(n>0); + bool all_transition = true; + bool all_already_xset = true; + size_t idx = cidx % MI_BFIELD_BITS; + size_t field = cidx / MI_BFIELD_BITS; + while (n > 0) { + size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field + if (m > n) { m = n; } + mi_assert_internal(idx + m <= MI_BFIELD_BITS); + mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); + const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask, &already_xset); + all_already_xset = all_already_xset && already_xset; + // next field + field++; + idx = 0; + n -= m; + } + *palready_xset = all_already_xset; + return all_transition; +} + +// Check if a sequence of `n` bits within a chunk are all set/cleared. +static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { + mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(n>0); + bool all_xset = true; + size_t idx = cidx % MI_BFIELD_BITS; + size_t field = cidx / MI_BFIELD_BITS; + while (n > 0) { + size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field + if (m > n) { m = n; } + mi_assert_internal(idx + m <= MI_BFIELD_BITS); + mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); + const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask); + // next field + field++; + idx = 0; + n -= m; + } + return all_xset; +} + +// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0), +// and false otherwise leaving all bit fields as is. +static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { + mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(n>0); + if (n==0) return true; + size_t start_idx = cidx % MI_BFIELD_BITS; + size_t start_field = cidx / MI_BFIELD_BITS; + size_t end_field = MI_BITMAP_CHUNK_FIELDS; + size_t mask_mid = 0; + size_t mask_end = 0; + + // first field + size_t field = start_field; + size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field + if (m > n) { m = n; } + mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); + mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS); + const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask_start)) return false; + + // done? + n -= m; + if (n==0) return true; + + // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields + + // mid fields + while (n >= MI_BFIELD_BITS) { + field++; + mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); + mask_mid = ~MI_ZU(0); + if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore; + n -= MI_BFIELD_BITS; + } + + // last field + if (n > 0) { + mi_assert_internal(n < MI_BFIELD_BITS); + field++; + mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); + end_field = field; + mask_end = (MI_ZU(1)<bfields[field], mask_end)) goto restore; + } + + return true; + +restore: + // field is on the field that failed to set atomically; we need to restore all previous fields + mi_assert_internal(field > start_field); + while( field > start_field) { + field--; + const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid)); + bool already_xset; + mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset); + } + return false; +} + + +// find least 1-bit in a chunk and try unset it atomically +// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. +// todo: try neon version +static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) { + #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) + while(true) { + const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); + if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ? + const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0) + const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0) + mi_assert_internal(mask != 0); + const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24 + mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS); + size_t cidx; + if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set + if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically + *pidx = (chunk_idx*MI_BFIELD_BITS) + cidx; + mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); + return true; + } + } + // try again + } + #else + size_t idx; + for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { + size_t idx; + if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit + if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically + *pidx = (i*MI_BFIELD_BITS + idx); + mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); + return true; + } + } + } + return false; + #endif +} + + +// find least byte in a chunk with all bits set, and try unset it atomically +// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. +// todo: try neon version +static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) { + #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) + while(true) { + const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); + const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0) + const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte + if (mask == 0) return false; + const size_t i = _tzcnt_u32(mask); + mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS); + const size_t chunk_idx = i / MI_BFIELD_SIZE; + const size_t byte_idx = i % MI_BFIELD_SIZE; + if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically + *pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8); + mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); + return true; + } + // try again + } + #else + size_t idx; + for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { + const mi_bfield_t x = chunk->bfields[i]; + // has_set8 has low bit in each byte set if the byte in x == 0xFF + const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F + (x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80 + >> 7; // shift high bit to low bit + size_t idx; + if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit + mi_assert_internal(idx <= (MI_BFIELD_BITS - 8)); + mi_assert_internal((idx%8)==0); + const size_t byte_idx = idx/8; + if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically + *pidx = (i*MI_BFIELD_BITS) + idx; + mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS); + return true; + } + // else continue + } + } + return false; + #endif +} + + +// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically +// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success. +// todo: try avx2 and neon version +// todo: allow spanning across bfield boundaries? +static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { + if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger? + const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1); + for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { + mi_bfield_t b = chunk->bfields[i]; + size_t bshift = 0; + size_t idx; + while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit + b >>= idx; + bshift += idx; + if (bshift + n >= MI_BFIELD_BITS) break; + + if ((b&mask) == mask) { // found a match + mi_assert_internal( ((mask << bshift) >> bshift) == mask ); + if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<bfields[i] >> bshift); + } } else { - // success, we claimed the bits! - *bitmap_idx = mi_bitmap_index_create(idx, bitidx); - return true; + // advance + const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask) + mi_assert_internal(ones>0); + bshift += ones; + b >>= ones; } } + } + return false; +} + + +// are all bits in a bitmap chunk set? +static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) { + #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) + const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); + return _mm256_test_all_ones(vec); + #else + // written like this for vectorization + mi_bfield_t x = chunk->bfields[0]; + for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) { + x = x & chunk->bfields[i]; + } + return (~x == 0); + #endif +} + +// are all bits in a bitmap chunk clear? +static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) { + #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) + const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); + return _mm256_testz_si256( vec, vec ); + #else + // written like this for vectorization + mi_bfield_t x = chunk->bfields[0]; + for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) { + x = x | chunk->bfields[i]; + } + return (x == 0); + #endif +} + +/* -------------------------------------------------------------------------------- + bitmap +-------------------------------------------------------------------------------- */ +// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true +void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) { + if (!already_zero) { + _mi_memzero_aligned(bitmap, sizeof(*bitmap)); + } +} + +// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread. +void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { + mi_assert_internal(n>0); + mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS); + + // first chunk + size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; + const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + size_t m = MI_BITMAP_CHUNK_BITS - cidx; + if (m > n) { m = n; } + bool already_xset; + mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset); + + // n can be large so use memset for efficiency for all in-between chunks + chunk_idx++; + n -= m; + const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS; + if (mid_chunks > 0) { + _mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8); + chunk_idx += mid_chunks; + n -= mid_chunks * MI_BITMAP_CHUNK_BITS; + } + + // last chunk + if (n > 0) { + mi_assert_internal(n < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS); + mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset); + } +} + + +// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0), +// and false otherwise leaving the bitmask as is. +bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal(idx < MI_BITMAP_MAX_BITS); + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; + const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx); +} + +// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0) +// and false otherwise leaving the bitmask as is. +bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal(idx < MI_BITMAP_MAX_BITS); + mi_assert_internal(idx%8 == 0); + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; + const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8; + return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx); +} + +// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) +// and false otherwise leaving the bitmask as is. +// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { + mi_assert_internal(n>0); + mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); } + if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); } + + mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; + const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) + if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia + return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n); +} + +// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). +// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) { + mi_assert_internal(n>0); + mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + bool local_already_xset; + if (already_xset==NULL) { already_xset = &local_already_xset; } + // if (n==1) { return mi_bitmap_xset(set, bitmap, idx); } + // if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); } + + mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; + const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) + if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia + return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset); +} + +// Is a sequence of n bits already all set/cleared? +bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { + mi_assert_internal(n>0); + mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; + const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) + if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia + return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n); +} + + +#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \ + { size_t _set_idx; \ + size_t _start = start % MI_BFIELD_BITS; \ + mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \ + while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \ + decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS; + +#define mi_bitmap_forall_set_chunks_end() \ + _start += _set_idx+1; /* so chunk_idx stays valid */ \ + _any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \ + _any_set >>= 1; \ + } \ + } + +// Find a set bit in a bitmap and atomically unset it. Returns true on success, +// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. +// The low `MI_BFIELD_BITS` of start are used to set the start point of the search +// (to reduce thread contention). +bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) { + mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) + { + size_t cidx; + if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) { + *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; + mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS); + return true; + } else { - // on to the next bit range -#if MI_HAS_FAST_BITSCAN - mi_assert_internal(mapm != 0); - const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); - mi_assert_internal(shift > 0 && shift <= count); -#else - const size_t shift = 1; -#endif - bitidx += shift; - m <<= shift; - } - } - // no bits found - return false; -} - - -// Starts at idx, and wraps around to search in all `bitmap_fields` fields. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. -bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { - size_t idx = start_field_idx; - for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) { idx = 0; } // wrap - if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { - return true; - } - } - return false; -} - - -// Set `count` bits at `bitmap_idx` to 0 atomically -// Returns `true` if all `count` bits were 1 previously. -bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const size_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - // mi_assert_internal((bitmap[idx] & mask) == mask); - const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); - return ((prev & mask) == mask); -} - - -// Set `count` bits at `bitmap_idx` to 1 atomically -// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const size_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); - size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); - if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); } - return ((prev & mask) == 0); -} - -// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. -static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const size_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - const size_t field = mi_atomic_load_relaxed(&bitmap[idx]); - if (any_ones != NULL) { *any_ones = ((field & mask) != 0); } - return ((field & mask) == mask); -} - -// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. -// Returns `true` if successful when all previous `count` bits were 0. -bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const size_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); - do { - if ((expected & mask) != 0) return false; - } - while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); - mi_assert_internal((expected & mask) == 0); - return true; -} - - -bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); -} - -bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - bool any_ones; - mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); - return any_ones; -} - - -//-------------------------------------------------------------------------- -// the `_across` functions work on bitmaps where sequences can cross over -// between the fields. This is used in arena allocation -//-------------------------------------------------------------------------- - -// Try to atomically claim a sequence of `count` bits starting from the field -// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. -// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`) -static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) -{ - mi_assert_internal(bitmap_idx != NULL); - - // check initial trailing zeros - mi_bitmap_field_t* field = &bitmap[idx]; - size_t map = mi_atomic_load_relaxed(field); - const size_t initial = mi_clz(map); // count of initial zeros starting at idx - mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); - if (initial == 0) return false; - if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) - if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries - - // scan ahead - size_t found = initial; - size_t mask = 0; // mask bits for the final field - while(found < count) { - field++; - map = mi_atomic_load_relaxed(field); - const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); - mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS); - mask = mi_bitmap_mask_(mask_bits, 0); - if ((map & mask) != 0) return false; // some part is already claimed - found += mask_bits; - } - mi_assert_internal(field < &bitmap[bitmap_fields]); - - // we found a range of contiguous zeros up to the final field; mask contains mask in the final field - // now try to claim the range atomically - mi_bitmap_field_t* const final_field = field; - const size_t final_mask = mask; - mi_bitmap_field_t* const initial_field = &bitmap[idx]; - const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial; - const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx); - - // initial field - size_t newmap; - field = initial_field; - map = mi_atomic_load_relaxed(field); - do { - newmap = (map | initial_mask); - if ((map & initial_mask) != 0) { goto rollback; }; - } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); - - // intermediate fields - while (++field < final_field) { - newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); - map = 0; - if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; } - } - - // final field - mi_assert_internal(field == final_field); - map = mi_atomic_load_relaxed(field); - do { - newmap = (map | final_mask); - if ((map & final_mask) != 0) { goto rollback; } - } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); - - // claimed! - mi_stat_counter_increase(stats->arena_crossover_count,1); - *bitmap_idx = mi_bitmap_index_create(idx, initial_idx); - return true; - -rollback: - // roll back intermediate fields - // (we just failed to claim `field` so decrement first) - while (--field > initial_field) { - newmap = 0; - map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); - mi_assert_internal(mi_atomic_load_relaxed(field) == map); - mi_atomic_store_release(field, newmap); - } - if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`) - map = mi_atomic_load_relaxed(field); - do { - mi_assert_internal((map & initial_mask) == initial_mask); - newmap = (map & ~initial_mask); - } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); - } - mi_stat_counter_increase(stats->arena_rollback_count,1); - // retry? (we make a recursive call instead of goto to be able to use const declarations) - if (retries <= 2) { - return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats); - } - else { - return false; - } -} - - -// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. -// Starts at idx, and wraps around to search in all `bitmap_fields` fields. -bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) { - mi_assert_internal(count > 0); - if (count <= 2) { - // we don't bother with crossover fields for small counts - return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); - } - - // visit the fields - size_t idx = start_field_idx; - for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) { idx = 0; } // wrap - // first try to claim inside a field - /* - if (count <= MI_BITMAP_FIELD_BITS) { - if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { - return true; + // we may find that all are unset only on a second iteration but that is ok as + // _any_set is a conservative approximation. + if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx); } } - */ - // if that fails, then try to claim across fields - if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) { - return true; - } } + mi_bitmap_forall_set_chunks_end(); return false; } -// Helper for masks across fields; returns the mid count, post_mask may be 0 -static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) { - MI_UNUSED(bitmap_fields); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) { - *pre_mask = mi_bitmap_mask_(count, bitidx); - *mid_mask = 0; - *post_mask = 0; - mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields); - return 0; - } - else { - const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx; - mi_assert_internal(pre_bits < count); - *pre_mask = mi_bitmap_mask_(pre_bits, bitidx); - count -= pre_bits; - const size_t mid_count = (count / MI_BITMAP_FIELD_BITS); - *mid_mask = MI_BITMAP_FIELD_FULL; - count %= MI_BITMAP_FIELD_BITS; - *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0)); - mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields); - return mid_count; + +// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. +// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. +bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) { + mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) + { + size_t cidx; + if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) { + *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; + mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8); + mi_assert_internal((*pidx % 8) == 0); + return true; + } + else { + if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx); + } + } } + mi_bitmap_forall_set_chunks_end(); + return false; } -// Set `count` bits at `bitmap_idx` to 0 atomically -// Returns `true` if all `count` bits were 1 previously. -bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - size_t idx = mi_bitmap_index_field(bitmap_idx); - size_t pre_mask; - size_t mid_mask; - size_t post_mask; - size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); - bool all_one = true; - mi_bitmap_field_t* field = &bitmap[idx]; - size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part - if ((prev & pre_mask) != pre_mask) all_one = false; - while(mid_count-- > 0) { - prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part - if ((prev & mid_mask) != mid_mask) all_one = false; +// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. +// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. +bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) { + // TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger + // TODO: allow spanning across chunk boundaries + if (n == 0 || n > MI_BFIELD_BITS) return false; + mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) + { + size_t cidx; + if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) { + *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; + mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n); + return true; + } + else { + if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx); + } + } } - if (post_mask!=0) { - prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part - if ((prev & post_mask) != post_mask) all_one = false; - } - return all_one; -} - -// Set `count` bits at `bitmap_idx` to 1 atomically -// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { - size_t idx = mi_bitmap_index_field(bitmap_idx); - size_t pre_mask; - size_t mid_mask; - size_t post_mask; - size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); - bool all_zero = true; - bool any_zero = false; - _Atomic(size_t)*field = &bitmap[idx]; - size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); - if ((prev & pre_mask) != 0) all_zero = false; - if ((prev & pre_mask) != pre_mask) any_zero = true; - while (mid_count-- > 0) { - prev = mi_atomic_or_acq_rel(field++, mid_mask); - if ((prev & mid_mask) != 0) all_zero = false; - if ((prev & mid_mask) != mid_mask) any_zero = true; - } - if (post_mask!=0) { - prev = mi_atomic_or_acq_rel(field, post_mask); - if ((prev & post_mask) != 0) all_zero = false; - if ((prev & post_mask) != post_mask) any_zero = true; - } - if (pany_zero != NULL) { *pany_zero = any_zero; } - return all_zero; -} - - -// Returns `true` if all `count` bits were 1. -// `any_ones` is `true` if there was at least one bit set to one. -static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { - size_t idx = mi_bitmap_index_field(bitmap_idx); - size_t pre_mask; - size_t mid_mask; - size_t post_mask; - size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); - bool all_ones = true; - bool any_ones = false; - mi_bitmap_field_t* field = &bitmap[idx]; - size_t prev = mi_atomic_load_relaxed(field++); - if ((prev & pre_mask) != pre_mask) all_ones = false; - if ((prev & pre_mask) != 0) any_ones = true; - while (mid_count-- > 0) { - prev = mi_atomic_load_relaxed(field++); - if ((prev & mid_mask) != mid_mask) all_ones = false; - if ((prev & mid_mask) != 0) any_ones = true; - } - if (post_mask!=0) { - prev = mi_atomic_load_relaxed(field); - if ((prev & post_mask) != post_mask) all_ones = false; - if ((prev & post_mask) != 0) any_ones = true; - } - if (pany_ones != NULL) { *pany_ones = any_ones; } - return all_ones; -} - -bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); -} - -bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - bool any_ones; - mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); - return any_ones; + mi_bitmap_forall_set_chunks_end(); + return false; } diff --git a/src/bitmap.h b/src/bitmap.h index f8898935..198a2902 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -6,105 +6,87 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- -Concurrent bitmap that can set/reset sequences of bits atomically, -represented as an array of fields where each field is a machine word (`size_t`) - -There are two api's; the standard one cannot have sequences that cross -between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). -(this is used in region allocation) - -The `_across` postfixed functions do allow sequences that can cross over -between the fields. (This is used in arena allocation) +Concurrent bitmap that can set/reset sequences of bits atomically ---------------------------------------------------------------------------- */ #pragma once #ifndef MI_BITMAP_H #define MI_BITMAP_H -/* ----------------------------------------------------------- - Bitmap definition ------------------------------------------------------------ */ +/* -------------------------------------------------------------------------------- + Definitions +-------------------------------------------------------------------------------- */ -#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE) -#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set +typedef size_t mi_bfield_t; -// An atomic bitmap of `size_t` fields -typedef _Atomic(size_t) mi_bitmap_field_t; -typedef mi_bitmap_field_t* mi_bitmap_t; +#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3) +#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT) +#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8) +#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1) +#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 .. +#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 .. -// A bitmap index is the index of the bit in a bitmap. -typedef size_t mi_bitmap_index_t; +#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS) +#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1) -// Create a bit index. -static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) { - mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS); - return (idx*MI_BITMAP_FIELD_BITS) + bitidx; -} -static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { - mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); - return mi_bitmap_index_create_ex(idx,bitidx); -} - -// Get the field index from a bit index. -static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { - return (bitmap_idx / MI_BITMAP_FIELD_BITS); -} - -// Get the bit index in a bitmap field -static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { - return (bitmap_idx % MI_BITMAP_FIELD_BITS); -} - -// Get the full bit index -static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { - return bitmap_idx; -} - -/* ----------------------------------------------------------- - Claim a bit sequence atomically ------------------------------------------------------------ */ - -// Try to atomically claim a sequence of `count` bits in a single -// field at `idx` in `bitmap`. Returns `true` on success. -bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx); - -// Starts at idx, and wraps around to search in all `bitmap_fields` fields. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. -bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); - -// Set `count` bits at `bitmap_idx` to 0 atomically -// Returns `true` if all `count` bits were 1 previously. -bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); - -// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. -// Returns `true` if successful when all previous `count` bits were 0. -bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); - -// Set `count` bits at `bitmap_idx` to 1 atomically -// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero); - -bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); -bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +typedef mi_decl_align(32) struct mi_bitmap_chunk_s { + _Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS]; +} mi_bitmap_chunk_t; -//-------------------------------------------------------------------------- -// the `_across` functions work on bitmaps where sequences can cross over -// between the fields. This is used in arena allocation -//-------------------------------------------------------------------------- +typedef mi_decl_align(32) struct mi_bitmap_s { + mi_bitmap_chunk_t chunks[MI_BFIELD_BITS]; + _Atomic(mi_bfield_t)any_set; +} mi_bitmap_t; -// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. -// Starts at idx, and wraps around to search in all `bitmap_fields` fields. -bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats); +#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit -// Set `count` bits at `bitmap_idx` to 0 atomically -// Returns `true` if all `count` bits were 1 previously. -bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +/* -------------------------------------------------------------------------------- + Bitmap +-------------------------------------------------------------------------------- */ -// Set `count` bits at `bitmap_idx` to 1 atomically -// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); +typedef bool mi_bit_t; +#define MI_BIT_SET (true) +#define MI_BIT_CLEAR (false) -bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); -bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true +void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero); -#endif +// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread. +void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); + +// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's). +// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared. +bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset); + +// Is a sequence of n bits already all set/cleared? +bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); + +// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) +// and false otherwise leaving the bitmask as is. +mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); + +// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0) +// and false otherwise leaving the bitmask as is. +mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); + +// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) +// and false otherwise leaving the bitmask as is. +// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); + +// Find a set bit in a bitmap and atomically unset it. Returns true on success, +// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. +// The low `MI_BFIELD_BITS` of start are used to set the start point of the search +// (to reduce thread contention). +mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start); + +// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. +// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. +mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ); + +// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. +// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. +mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ); + +#endif // MI_XBITMAP_H diff --git a/src/free.c b/src/free.c index f2e30b65..e1cc9276 100644 --- a/src/free.c +++ b/src/free.c @@ -24,7 +24,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block); // ------------------------------------------------------ // forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) -static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block); +static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_block_t* block); // regular free of a (thread local) block pointer // fast path written carefully to prevent spilling on the stack @@ -57,7 +57,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); - size_t diff = (uint8_t*)p - page->page_start; + size_t diff = (uint8_t*)p - mi_page_start(page); size_t adjust; if mi_likely(page->block_size_shift != 0) { adjust = diff & (((size_t)1 << page->block_size_shift) - 1); @@ -82,72 +82,55 @@ static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, vo #endif // free a local pointer (page parameter comes first for better codegen) -static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { - MI_UNUSED(segment); +static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_attr_noexcept { mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p); mi_block_check_unguard(page, block, p); mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */); } // free a pointer owned by another thread (page parameter comes first for better codegen) -static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { +static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept { mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865) mi_block_check_unguard(page, block, p); - mi_free_block_mt(page, segment, block); + mi_free_block_mt(page, block); } // generic free (for runtime integration) -void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { - if (is_local) mi_free_generic_local(page,segment,p); - else mi_free_generic_mt(page,segment,p); +void mi_decl_noinline _mi_free_generic(mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { + if (is_local) mi_free_generic_local(page,p); + else mi_free_generic_mt(page,p); } // Get the segment data belonging to a pointer // This is just a single `and` in release mode but does further checks in debug mode // (and secure mode) to see if this was a valid pointer. -static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) +static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg) { - MI_UNUSED(msg); - - #if (MI_DEBUG>0) + MI_UNUSED_RELEASE(msg); + #if MI_DEBUG if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) { _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); return NULL; } #endif - - mi_segment_t* const segment = _mi_ptr_segment(p); - if mi_unlikely(segment==NULL) return segment; - - #if (MI_DEBUG>0) - if mi_unlikely(!mi_is_in_heap_region(p)) { - _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" - "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); - if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { - _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); - } + mi_page_t* const page = _mi_ptr_page(p); + #if MI_DEBUG + if (page == MI_PAGE_PTR_INVALID) { + _mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p); } #endif - #if (MI_DEBUG>0 || MI_SECURE>=4) - if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) { - _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); - return NULL; - } - #endif - - return segment; + return page; } // Free a block // Fast path written carefully to prevent register spilling on the stack void mi_free(void* p) mi_attr_noexcept { - mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); - if mi_unlikely(segment==NULL) return; - - const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); - mi_page_t* const page = _mi_segment_page_of(segment, p); + mi_page_t* const page = mi_checked_ptr_page(p,"mi_free"); + if mi_unlikely(page==NULL) return; + + const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page)); if mi_likely(is_local) { // thread-local free? if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) // thread-local, aligned, and not a full page @@ -156,12 +139,12 @@ void mi_free(void* p) mi_attr_noexcept } else { // page is full or contains (inner) aligned blocks; use generic path - mi_free_generic_local(page, segment, p); + mi_free_generic_local(page, p); } } else { // not thread-local; use generic path - mi_free_generic_mt(page, segment, p); + mi_free_generic_mt(page, p); } } @@ -169,10 +152,8 @@ void mi_free(void* p) mi_attr_noexcept bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page mi_assert_internal(block!=NULL); - const mi_segment_t* const segment = _mi_ptr_segment(block); - mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(_mi_thread_id() == segment->thread_id); - mi_page_t* const page = _mi_segment_page_of(segment, block); + mi_page_t* const page = mi_checked_ptr_page(block,"_mi_free_delayed_block"); + mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page)); // Clear the no-delayed flag so delayed freeing is used again for this page. // This must be done before collecting the free lists on this page -- otherwise @@ -242,20 +223,19 @@ static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block } // Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) -static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block) +static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block) { - // first see if the segment was abandoned and if we can reclaim it into our thread - if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 && - #if MI_HUGE_PAGE_ABANDON - segment->page_kind != MI_PAGE_HUGE && - #endif - mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned? + // first see if the page was abandoned and if we can reclaim it into our thread + if (mi_page_is_abandoned(page) && + (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 || + mi_page_is_singleton(page) // only one block, and we are free-ing it + ) && mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944)) { - // the segment is abandoned, try to reclaim it into our heap - if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { - mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); - mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc); + // the page is abandoned, try to reclaim it into our heap + if (_mi_heap_try_reclaim(mi_heap_get_default(), page)) { // TODO: avoid putting it in the full free queue + mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page)); + // mi_assert_internal(mi_heap_get_default()->tld->subproc == page->subproc); mi_free(block); // recursively free as now it will be a local free in our heap return; } @@ -272,17 +252,12 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection _mi_padding_shrink(page, block, sizeof(mi_block_t)); - if (segment->page_kind == MI_PAGE_HUGE) { - #if MI_HUGE_PAGE_ABANDON - // huge page segments are always abandoned and can be freed immediately - _mi_segment_huge_page_free(segment, page, block); - return; - #else + if (mi_page_is_huge(page)) { + mi_assert_internal(mi_page_is_singleton(page)); // huge pages are special as they occupy the entire segment // as these are large we reset the memory occupied by the page so it is available to other threads // (as the owning thread needs to actually free the memory later). - _mi_segment_huge_page_reset(segment, page, block); - #endif + _mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively } else { #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading @@ -316,9 +291,8 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p } static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { - const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); - if mi_unlikely(segment==NULL) return 0; - const mi_page_t* const page = _mi_segment_page_of(segment, p); + const mi_page_t* const page = mi_checked_ptr_page(p,msg); + if mi_unlikely(page==NULL) return 0; if mi_likely(!mi_page_has_aligned(page)) { const mi_block_t* block = (const mi_block_t*)p; return mi_page_usable_size_of(page, block); @@ -514,20 +488,20 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { // only maintain stats for smaller objects if requested #if (MI_STAT>0) static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { -#if (MI_STAT < 2) + #if (MI_STAT < 2) MI_UNUSED(block); -#endif + #endif mi_heap_t* const heap = mi_heap_get_default(); const size_t bsize = mi_page_usable_block_size(page); -#if (MI_STAT>1) + #if (MI_STAT>1) const size_t usize = mi_page_usable_size_of(page, block); mi_heap_stat_decrease(heap, malloc, usize); -#endif - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + #endif + if (bsize <= MI_LARGE_MAX_OBJ_SIZE) { mi_heap_stat_decrease(heap, normal, bsize); -#if (MI_STAT > 1) + #if (MI_STAT > 1) mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); -#endif + #endif } else { const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc diff --git a/src/heap.c b/src/heap.c index 581b3f71..e4955ba7 100644 --- a/src/heap.c +++ b/src/heap.c @@ -7,11 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc/internal.h" -#include "mimalloc/atomic.h" #include "mimalloc/prim.h" // mi_prim_get_default_heap -#include // memset, memcpy - #if defined(_MSC_VER) && (_MSC_VER < 1920) #pragma warning(disable:4204) // non-constant aggregate initializer #endif @@ -258,7 +255,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(heap != NULL); mi_assert_internal(mi_heap_is_initialized(heap)); // TODO: copy full empty heap instead? - memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); + _mi_memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); _mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages)); heap->thread_delayed_free = NULL; heap->page_count = 0; diff --git a/src/os.c b/src/os.c index 36b167cb..83521766 100644 --- a/src/os.c +++ b/src/os.c @@ -59,6 +59,10 @@ size_t _mi_os_large_page_size(void) { return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size()); } +size_t _mi_os_virtual_address_bits(void) { + return mi_os_mem_config.virtual_address_bits; +} + bool _mi_os_use_large_page(size_t size, size_t alignment) { // if we have access, check the size and alignment requirements if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false; @@ -103,58 +107,10 @@ static void* mi_align_down_ptr(void* p, size_t alignment) { return (void*)_mi_align_down((uintptr_t)p, alignment); } - -/* ----------------------------------------------------------- - aligned hinting --------------------------------------------------------------- */ - -// On systems with enough virtual address bits, we can do efficient aligned allocation by using -// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address -// space (64TiB) we use this technique. (but see issue #939) -#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT) -static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; - -// Return a MI_SEGMENT_SIZE aligned address that is probably available. -// If this returns NULL, the OS will determine the address but on some OS's that may not be -// properly aligned which can be more costly as it needs to be adjusted afterwards. -// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; -// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses -// in the middle of the 2TiB - 6TiB address range (see issue #372)) - -#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start -#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) -#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) - -void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) -{ - if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space - size = _mi_align_up(size, MI_SEGMENT_SIZE); - if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096. - #if (MI_SECURE>0) - size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. - #endif - - uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); - if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize - uintptr_t init = MI_HINT_BASE; - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); - init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB - #endif - uintptr_t expected = hint + size; - mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); - hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all - } - if (hint%try_alignment != 0) return NULL; - return (void*)hint; -} -#else void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { MI_UNUSED(try_alignment); MI_UNUSED(size); return NULL; } -#endif /* ----------------------------------------------------------- @@ -380,12 +336,10 @@ void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { ----------------------------------------------------------- */ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) { - mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); *memid = _mi_memid_none(); if (stats == NULL) stats = &_mi_stats_main; - if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { // regular aligned allocation return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats); @@ -605,7 +559,6 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { #endif } end = start + size; - mi_assert_internal(end % MI_SEGMENT_SIZE == 0); } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end)); if (total_size != NULL) *total_size = size; diff --git a/src/page-map.c b/src/page-map.c new file mode 100644 index 00000000..d3fcef79 --- /dev/null +++ b/src/page-map.c @@ -0,0 +1,90 @@ +/*---------------------------------------------------------------------------- +Copyright (c) 2023-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "bitmap.h" + +mi_decl_cache_align signed char* _mi_page_map = NULL; +static bool mi_page_map_all_committed = false; +static size_t mi_blocks_per_commit_bit = 1; +static mi_memid_t mi_page_map_memid; +static mi_bitmap_t mi_page_map_commit; + +static bool mi_page_map_init(void) { + size_t vbits = _mi_os_virtual_address_bits(); + if (vbits >= 48) vbits = 47; + // 1 byte per block = 2 GiB for 128 TiB address space (48 bit = 256 TiB address space) + // 64 KiB for 4 GiB address space (on 32-bit) + const size_t page_map_size = (MI_ZU(1) << (vbits >> MI_ARENA_BLOCK_SHIFT)); + + const size_t min_commit_size = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS); + mi_blocks_per_commit_bit = mi_block_count_of_size(min_commit_size); + + mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems + _mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 0, mi_page_map_all_committed, true, &mi_page_map_memid, NULL); + if (_mi_page_map==NULL) { + _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); + return false; + } + if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) { + _mi_warning_message("the page map was committed on-demand but not zero initialized!\n"); + _mi_memzero_aligned(_mi_page_map, page_map_size); + } + return true; +} + +static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* block_count) { + size_t page_size; + *page_start = mi_page_area(page, &page_size); + if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE; } // furthest interior pointer + *block_count = mi_block_count_of_size(page_size); + return ((uintptr_t)*page_start >> MI_ARENA_BLOCK_SHIFT); +} + + + +void _mi_page_map_register(mi_page_t* page) { + if mi_unlikely(_mi_page_map == NULL) { + if (!mi_page_map_init()) return; + } + uint8_t* page_start; + size_t block_count; + const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count); + + // is the page map area that contains the page address committed? + if (!mi_page_map_all_committed) { + const size_t commit_bit_count = _mi_divide_up(block_count, mi_blocks_per_commit_bit); + const size_t commit_bit_idx = idx / mi_blocks_per_commit_bit; + for (size_t i = 0; i < commit_bit_count; i++) { // per bit to avoid crossing over bitmap chunks + if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, commit_bit_idx + i, 1)) { + // this may race, in which case we do multiple commits (which is ok) + _mi_os_commit(page_start + (i*mi_blocks_per_commit_bit*MI_ARENA_BLOCK_SIZE), mi_blocks_per_commit_bit* MI_ARENA_BLOCK_SIZE, NULL, NULL); + mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, commit_bit_idx + i, 1, NULL); + } + } + } + + // set the offsets + for (int i = 0; i < block_count; i++) { + mi_assert_internal(i < 128); + _mi_page_map[idx + i] = (int8_t)(-i-1); + } +} + + +void _mi_page_map_unregister(mi_page_t* page) { + mi_assert_internal(_mi_page_map != NULL); + + // get index and count + uint8_t* page_start; + size_t block_count; + const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count); + + // unset the offsets + _mi_memzero(_mi_page_map + idx, block_count); +} diff --git a/src/page.c b/src/page.c index c681d6d0..a00ff615 100644 --- a/src/page.c +++ b/src/page.c @@ -59,7 +59,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) { static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { size_t psize; - uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize); + uint8_t* page_area = mi_page_area(page, &psize); mi_block_t* start = (mi_block_t*)page_area; mi_block_t* end = (mi_block_t*)(page_area + psize); while(p != NULL) { @@ -83,10 +83,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->capacity <= page->reserved); // const size_t bsize = mi_page_block_size(page); - mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = mi_page_start(page); - mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); - mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -122,15 +119,11 @@ bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(page->keys[0] != 0); #endif if (mi_page_heap(page)!=NULL) { - mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0); - #if MI_HUGE_PAGE_ABANDON - if (segment->page_kind != MI_PAGE_HUGE) - #endif + mi_assert_internal(!_mi_process_is_initialized || page->thread_id == mi_page_heap(page)->thread_id || page->thread_id==0); { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_MAX_OBJ_SIZE || mi_page_is_in_full(page)); mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); } } @@ -274,16 +267,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size #if !MI_HUGE_PAGE_ABANDON mi_assert_internal(pq != NULL); mi_assert_internal(mi_heap_contains_queue(heap, pq)); - mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size); + mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size); #endif - mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os); + mi_page_t* page = _mi_heap_page_alloc(heap, block_size, page_alignment); if (page == NULL) { // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) return NULL; } - #if MI_HUGE_PAGE_ABANDON - mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - #endif mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); // a fresh page was found, initialize it const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc @@ -384,7 +374,6 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_heap_t* pheap = mi_page_heap(page); // remove from our page list - mi_segments_tld_t* segments_tld = &pheap->tld->segments; mi_page_queue_remove(pq, page); // page is no longer associated with our heap @@ -399,8 +388,8 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #endif // and abandon it - mi_assert_internal(mi_page_heap(page) == NULL); - _mi_segment_page_abandon(page,segments_tld); + mi_assert_internal(mi_page_is_abandoned(page)); + _mi_arena_page_abandon(page,&pheap->tld); } // force abandon a page @@ -411,8 +400,7 @@ void _mi_page_force_abandon(mi_page_t* page) { // ensure this page is no longer in the heap delayed free list _mi_heap_delayed_free_all(heap); - // We can still access the page meta-info even if it is freed as we ensure - // in `mi_segment_force_abandon` that the segment is not freed (yet) + // TODO: can we still access the page meta-info even if it is freed? if (page->capacity == 0) return; // it may have been freed now // and now unlink it from the page queue and abandon (or free) @@ -433,17 +421,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING); + mi_heap_t* pheap = mi_page_heap(page); + // no more aligned blocks in here mi_page_set_has_aligned(page, false); // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) - mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments; mi_page_queue_remove(pq, page); // and free it mi_page_set_heap(page,NULL); - _mi_segment_page_free(page, force, segments_tld); + _mi_arena_page_free(page, force, &pheap->tld); } #define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE @@ -474,7 +463,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); + page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; @@ -639,7 +628,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) size_t page_size; //uint8_t* page_start = - _mi_segment_page_start(_mi_page_segment(page), page, &page_size); + mi_page_area(page, &page_size); mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count @@ -676,15 +665,13 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // Initialize a fresh page static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) { mi_assert(page != NULL); - mi_segment_t* segment = _mi_page_segment(page); - mi_assert(segment != NULL); mi_assert_internal(block_size > 0); // set fields mi_page_set_heap(page, heap); page->block_size = block_size; size_t page_size; - page->page_start = _mi_segment_page_start(segment, page, &page_size); - mi_track_mem_noaccess(page->page_start,page_size); + uint8_t* page_start = mi_page_area(page, &page_size); + mi_track_mem_noaccess(page_start,page_size); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); mi_assert_internal(page->reserved > 0); @@ -692,15 +679,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif - page->free_is_zero = page->is_zero_init; + page->free_is_zero = page->memid.initially_zero; #if MI_DEBUG>2 - if (page->is_zero_init) { + if (page->memid.initially_zero) { mi_track_mem_defined(page->page_start, page_size); - mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size)); + mi_assert_expensive(mi_mem_is_zero(page_start, page_size)); } #endif if (block_size > 0 && _mi_is_power_of_two(block_size)) { - page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size)); + page->block_size_shift = (uint8_t)mi_ctz(block_size); } else { page->block_size_shift = 0; @@ -734,13 +721,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi // search for a best next page to use for at most N pages (often cut short if immediate blocks are available) #define MI_MAX_CANDIDATE_SEARCH (8) -// is the page not yet used up to its reserved space? -static bool mi_page_is_expandable(const mi_page_t* page) { - mi_assert_internal(page != NULL); - mi_assert_internal(page->capacity <= page->reserved); - return (page->capacity < page->reserved); -} - // Find a page with free blocks of `page->block_size`. static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) @@ -907,7 +887,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a #if MI_HUGE_PAGE_ABANDON mi_page_queue_t* pq = NULL; #else - mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size + mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_MAX_OBJ_SIZE+1); // always in the huge queue regardless of the block size mi_assert_internal(mi_page_queue_is_huge(pq)); #endif mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment); @@ -915,10 +895,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a mi_assert_internal(mi_page_block_size(page) >= size); mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_is_huge(page)); - mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE); - mi_assert_internal(_mi_page_segment(page)->used==1); + mi_assert_internal(mi_page_is_singleton(page)); #if MI_HUGE_PAGE_ABANDON - mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue + mi_assert_internal(mi_page_is_abandoned(page)); mi_page_set_heap(page, NULL); #endif mi_heap_stat_increase(heap, huge, mi_page_block_size(page)); @@ -933,7 +912,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept { // huge allocation? const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` - if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) { + if mi_unlikely(req_size > (MI_LARGE_MAX_OBJ_SIZE - MI_PADDING_SIZE) || huge_alignment > 0) { if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) { _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size); return NULL; diff --git a/src/static.c b/src/static.c index 9e06ce05..b34d5d42 100644 --- a/src/static.c +++ b/src/static.c @@ -20,7 +20,7 @@ terms of the MIT license. A copy of the license can be found in the file // containing the whole library. If it is linked first // it will override all the standard library allocation // functions (on Unix's). -#include "alloc.c" // includes alloc-override.c +#include "alloc.c" // includes alloc-override.c and free.c #include "alloc-aligned.c" #include "alloc-posix.c" #include "arena.c" @@ -31,6 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "options.c" #include "os.c" #include "page.c" // includes page-queue.c +#include "page-map.c" #include "random.c" #include "segment.c" #include "segment-map.c" diff --git a/src/xbitmap.c b/src/xbitmap.c deleted file mode 100644 index 68525c84..00000000 --- a/src/xbitmap.c +++ /dev/null @@ -1,599 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019-2024 Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -Concurrent bitmap that can set/reset sequences of bits atomically ----------------------------------------------------------------------------- */ - -#include "mimalloc.h" -#include "mimalloc/internal.h" -#include "mimalloc/bits.h" -#include "xbitmap.h" - -/* -------------------------------------------------------------------------------- - bfields --------------------------------------------------------------------------------- */ - -static inline size_t mi_bfield_ctz(mi_bfield_t x) { - return mi_ctz(x); -} - -static inline size_t mi_bfield_clz(mi_bfield_t x) { - return mi_clz(x); -} - -// find the least significant bit that is set (i.e. count trailing zero's) -// return false if `x==0` (with `*idx` undefined) and true otherwise, -// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). -static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) { - return mi_bsf(x,idx); -} - -static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) { - return mi_rotr(x,r); -} - -// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0). -static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) { - mi_assert_internal(idx < MI_BFIELD_BITS); - const mi_bfield_t mask = ((mi_bfield_t)1)<bfields[i], idx); -} - -static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) { - mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS); - const size_t i = byte_idx / MI_BFIELD_SIZE; - const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE; - return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx); -} - -// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0) -static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) { - mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS); - mi_assert_internal(n>0); - bool all_transition = true; - bool all_already_xset = true; - size_t idx = cidx % MI_BFIELD_BITS; - size_t field = cidx / MI_BFIELD_BITS; - while (n > 0) { - size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field - if (m > n) { m = n; } - mi_assert_internal(idx + m <= MI_BFIELD_BITS); - mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); - const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask, &already_xset); - all_already_xset = all_already_xset && already_xset; - // next field - field++; - idx = 0; - n -= m; - } - *palready_xset = all_already_xset; - return all_transition; -} - -// Check if a sequence of `n` bits within a chunk are all set/cleared. -static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { - mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS); - mi_assert_internal(n>0); - bool all_xset = true; - size_t idx = cidx % MI_BFIELD_BITS; - size_t field = cidx / MI_BFIELD_BITS; - while (n > 0) { - size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field - if (m > n) { m = n; } - mi_assert_internal(idx + m <= MI_BFIELD_BITS); - mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); - const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask); - // next field - field++; - idx = 0; - n -= m; - } - return all_xset; -} - -// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0), -// and false otherwise leaving all bit fields as is. -static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { - mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS); - mi_assert_internal(n>0); - if (n==0) return true; - size_t start_idx = cidx % MI_BFIELD_BITS; - size_t start_field = cidx / MI_BFIELD_BITS; - size_t end_field = MI_BITMAP_CHUNK_FIELDS; - size_t mask_mid = 0; - size_t mask_end = 0; - - // first field - size_t field = start_field; - size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field - if (m > n) { m = n; } - mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); - mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS); - const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask_start)) return false; - - // done? - n -= m; - if (n==0) return true; - - // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields - - // mid fields - while (n >= MI_BFIELD_BITS) { - field++; - mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); - mask_mid = ~MI_ZU(0); - if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore; - n -= MI_BFIELD_BITS; - } - - // last field - if (n > 0) { - mi_assert_internal(n < MI_BFIELD_BITS); - field++; - mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); - end_field = field; - mask_end = (MI_ZU(1)<bfields[field], mask_end)) goto restore; - } - - return true; - -restore: - // field is on the field that failed to set atomically; we need to restore all previous fields - mi_assert_internal(field > start_field); - while( field > start_field) { - field--; - const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid)); - bool already_xset; - mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset); - } - return false; -} - - -// find least 1-bit in a chunk and try unset it atomically -// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. -// todo: try neon version -static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) { - #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) - while(true) { - const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ? - const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0) - const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0) - mi_assert_internal(mask != 0); - const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24 - mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS); - size_t cidx; - if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set - if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically - *pidx = (chunk_idx*MI_BFIELD_BITS) + cidx; - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); - return true; - } - } - // try again - } - #else - size_t idx; - for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { - size_t idx; - if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit - if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically - *pidx = (i*MI_BFIELD_BITS + idx); - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); - return true; - } - } - } - return false; - #endif -} - - -// find least byte in a chunk with all bits set, and try unset it atomically -// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. -// todo: try neon version -static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) { - #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) - while(true) { - const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0) - const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte - if (mask == 0) return false; - const size_t i = _tzcnt_u32(mask); - mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS); - const size_t chunk_idx = i / MI_BFIELD_SIZE; - const size_t byte_idx = i % MI_BFIELD_SIZE; - if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically - *pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8); - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); - return true; - } - // try again - } - #else - size_t idx; - for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { - const mi_bfield_t x = chunk->bfields[i]; - // has_set8 has low bit in each byte set if the byte in x == 0xFF - const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F - (x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80 - >> 7; // shift high bit to low bit - size_t idx; - if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit - mi_assert_internal(idx <= (MI_BFIELD_BITS - 8)); - mi_assert_internal((idx%8)==0); - const size_t byte_idx = idx/8; - if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically - *pidx = (i*MI_BFIELD_BITS) + idx; - mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS); - return true; - } - // else continue - } - } - return false; - #endif -} - - -// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically -// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success. -// todo: try avx2 and neon version -// todo: allow spanning across bfield boundaries? -static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { - if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger? - const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1); - for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { - mi_bfield_t b = chunk->bfields[i]; - size_t bshift = 0; - size_t idx; - while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit - b >>= idx; - bshift += idx; - if (bshift + n >= MI_BFIELD_BITS) break; - - if ((b&mask) == mask) { // found a match - mi_assert_internal( ((mask << bshift) >> bshift) == mask ); - if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<bfields[i] >> bshift); - } - } - else { - // advance - const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask) - mi_assert_internal(ones>0); - bshift += ones; - b >>= ones; - } - } - } - return false; -} - - -// are all bits in a bitmap chunk set? -static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) { - #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) - const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - return _mm256_test_all_ones(vec); - #else - // written like this for vectorization - mi_bfield_t x = chunk->bfields[0]; - for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) { - x = x & chunk->bfields[i]; - } - return (~x == 0); - #endif -} - -// are all bits in a bitmap chunk clear? -static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) { - #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) - const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - return _mm256_testz_si256( vec, vec ); - #else - // written like this for vectorization - mi_bfield_t x = chunk->bfields[0]; - for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) { - x = x | chunk->bfields[i]; - } - return (x == 0); - #endif -} - -/* -------------------------------------------------------------------------------- - bitmap --------------------------------------------------------------------------------- */ -// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true -void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) { - if (!already_zero) { - _mi_memzero_aligned(bitmap, sizeof(*bitmap)); - } -} - -// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread. -void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { - mi_assert_internal(n>0); - mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS); - - // first chunk - size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - size_t m = MI_BITMAP_CHUNK_BITS - cidx; - if (m > n) { m = n; } - bool already_xset; - mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset); - - // n can be large so use memset for efficiency for all in-between chunks - chunk_idx++; - n -= m; - const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS; - if (mid_chunks > 0) { - _mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8); - chunk_idx += mid_chunks; - n -= mid_chunks * MI_BITMAP_CHUNK_BITS; - } - - // last chunk - if (n > 0) { - mi_assert_internal(n < MI_BITMAP_CHUNK_BITS); - mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS); - mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset); - } -} - - -// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0), -// and false otherwise leaving the bitmask as is. -bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { - mi_assert_internal(idx < MI_BITMAP_MAX_BITS); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx); -} - -// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0) -// and false otherwise leaving the bitmask as is. -bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { - mi_assert_internal(idx < MI_BITMAP_MAX_BITS); - mi_assert_internal(idx%8 == 0); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8; - return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx); -} - -// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) -// and false otherwise leaving the bitmask as is. -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { - mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); - if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); } - if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); } - - mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) - if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia - return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n); -} - -// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) { - mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); - bool local_already_xset; - if (already_xset==NULL) { already_xset = &local_already_xset; } - // if (n==1) { return mi_bitmap_xset(set, bitmap, idx); } - // if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); } - - mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) - if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia - return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset); -} - -// Is a sequence of n bits already all set/cleared? -bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { - mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); - mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) - if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia - return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n); -} - - -#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \ - { size_t _set_idx; \ - size_t _start = start % MI_BFIELD_BITS; \ - mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \ - while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \ - decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS; - -#define mi_bitmap_forall_set_chunks_end() \ - _start += _set_idx+1; /* so chunk_idx stays valid */ \ - _any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \ - _any_set >>= 1; \ - } \ - } - -// Find a set bit in a bitmap and atomically unset it. Returns true on success, -// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. -// The low `MI_BFIELD_BITS` of start are used to set the start point of the search -// (to reduce thread contention). -bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) { - mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) - { - size_t cidx; - if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) { - *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS); - return true; - } - else { - // we may find that all are unset only on a second iteration but that is ok as - // _any_set is a conservative approximation. - if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx); - } - } - } - mi_bitmap_forall_set_chunks_end(); - return false; -} - - -// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. -// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. -bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) { - mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) - { - size_t cidx; - if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) { - *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8); - mi_assert_internal((*pidx % 8) == 0); - return true; - } - else { - if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx); - } - } - } - mi_bitmap_forall_set_chunks_end(); - return false; -} - -// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. -// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. -bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) { - // TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger - // TODO: allow spanning across chunk boundaries - if (n == 0 || n > MI_BFIELD_BITS) return false; - mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx) - { - size_t cidx; - if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) { - *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n); - return true; - } - else { - if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx); - } - } - } - mi_bitmap_forall_set_chunks_end(); - return false; -} diff --git a/src/xbitmap.h b/src/xbitmap.h deleted file mode 100644 index 869db2a2..00000000 --- a/src/xbitmap.h +++ /dev/null @@ -1,94 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019-2023 Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -Concurrent bitmap that can set/reset sequences of bits atomically ----------------------------------------------------------------------------- */ -#pragma once -#ifndef MI_XBITMAP_H -#define MI_XBITMAP_H - -/* -------------------------------------------------------------------------------- - Definitions --------------------------------------------------------------------------------- */ - -typedef size_t mi_bfield_t; - -#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3) -#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT) -#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8) -#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1) -#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 .. -#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 .. - -#define MI_BITMAP_CHUNK_BITS_SHIFT (8) // 2^8 = 256 bits per chunk -#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT) -#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS) -#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1) - -typedef mi_decl_align(32) struct mi_bitmap_chunk_s { - _Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS]; -} mi_bitmap_chunk_t; - - -typedef mi_decl_align(32) struct mi_bitmap_s { - mi_bitmap_chunk_t chunks[MI_BFIELD_BITS]; - _Atomic(mi_bfield_t)any_set; -} mi_bitmap_t; - -#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit - -/* -------------------------------------------------------------------------------- - Bitmap --------------------------------------------------------------------------------- */ - -typedef bool mi_bit_t; -#define MI_BIT_SET (true) -#define MI_BIT_CLEAR (false) - -// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true -void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero); - -// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread. -void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); - -// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's). -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared. -bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset); - -// Is a sequence of n bits already all set/cleared? -bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); - -// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) -// and false otherwise leaving the bitmask as is. -mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); - -// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0) -// and false otherwise leaving the bitmask as is. -mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); - -// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) -// and false otherwise leaving the bitmask as is. -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); - -// Find a set bit in a bitmap and atomically unset it. Returns true on success, -// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. -// The low `MI_BFIELD_BITS` of start are used to set the start point of the search -// (to reduce thread contention). -mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start); - -// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. -// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. -mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ); - -// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. -// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. -mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ); - -#endif // MI_XBITMAP_H