diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5fc1808e..5cb05840 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,6 +55,7 @@ set(mi_sources
src/options.c
src/os.c
src/page.c
+ src/page-map.c
src/random.c
src/segment.c
src/segment-map.c
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj
index 138acf39..3dd7326f 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc.vcxproj
@@ -214,12 +214,7 @@
-
- true
- true
- true
- true
-
+
false
@@ -232,6 +227,7 @@
+
true
@@ -248,12 +244,8 @@
-
-
-
-
diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters
index 48958be1..2eed7e90 100644
--- a/ide/vs2022/mimalloc.vcxproj.filters
+++ b/ide/vs2022/mimalloc.vcxproj.filters
@@ -43,12 +43,6 @@
Sources
-
- Sources
-
-
- Sources
-
Sources
@@ -58,13 +52,10 @@
Sources
-
+
Sources
-
- Sources
-
-
+
Sources
diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h
index 642f0f9c..ad7ea3e6 100644
--- a/include/mimalloc/bits.h
+++ b/include/mimalloc/bits.h
@@ -100,6 +100,10 @@ typedef int32_t mi_ssize_t;
#define __BMI1__ 1
#endif
+// Define big endian if needed
+// #define MI_BIG_ENDIAN 1
+
+
/* --------------------------------------------------------------------------------
Builtin's
-------------------------------------------------------------------------------- */
@@ -310,4 +314,6 @@ static inline size_t mi_rotl(size_t x, size_t r) {
#endif
}
+
+
#endif // MI_BITS_H
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index b997099e..2713c0ac 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -108,6 +108,7 @@ size_t _mi_os_page_size(void);
size_t _mi_os_good_alloc_size(size_t size);
bool _mi_os_has_overcommit(void);
bool _mi_os_has_virtual_reserve(void);
+size_t _mi_os_virtual_address_bits(void);
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
@@ -136,12 +137,11 @@ bool _mi_arena_contains(const void* p);
void _mi_arenas_collect(bool force_purge, mi_stats_t* stats);
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
-bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment);
-void _mi_arena_segment_mark_abandoned(mi_segment_t* segment);
void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid);
void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size);
+/*
typedef struct mi_arena_field_cursor_s { // abstract struct
size_t os_list_count; // max entries to visit in the OS abandoned list
size_t start; // start arena idx (may need to be wrapped)
@@ -154,27 +154,12 @@ typedef struct mi_arena_field_cursor_s { // abstract struct
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current);
+*/
-// "segment-map.c"
-void _mi_segment_map_allocated_at(const mi_segment_t* segment);
-void _mi_segment_map_freed_at(const mi_segment_t* segment);
+// "page-map.c"
+void _mi_page_map_register(mi_page_t* page);
+void _mi_page_map_unregister(mi_page_t* page);
-// "segment.c"
-mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
-void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
-void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
-uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size);
-
-#if MI_HUGE_PAGE_ABANDON
-void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
-#else
-void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
-#endif
-
-void _mi_segments_collect(bool force, mi_segments_tld_t* tld);
-void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
-bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);
-bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
@@ -226,7 +211,7 @@ void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, siz
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block);
-void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
+// void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
// "libc.c"
@@ -338,8 +323,8 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
// Align a pointer upwards
-static inline void* mi_align_up_ptr(void* p, size_t alignment) {
- return (void*)_mi_align_up((uintptr_t)p, alignment);
+static inline uint8_t* _mi_align_up_ptr(void* p, size_t alignment) {
+ return (uint8_t*)_mi_align_up((uintptr_t)p, alignment);
}
@@ -445,68 +430,44 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
return heap->pages_free_direct[idx];
}
-// Segment that contains the pointer
-// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
-// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
-// therefore we align one byte before `p`.
-// We check for NULL afterwards on 64-bit systems to improve codegen for `mi_free`.
-static inline mi_segment_t* _mi_ptr_segment(const void* p) {
- mi_segment_t* const segment = (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
- #if MI_INTPTR_SIZE <= 4
- return (p==NULL ? NULL : segment);
- #else
- return ((intptr_t)segment <= 0 ? NULL : segment);
+
+extern signed char* _mi_page_map;
+
+#define MI_PAGE_PTR_INVALID ((mi_page_t*)(1))
+
+static inline mi_page_t* _mi_ptr_page(const void* p) {
+ const uintptr_t up = ((uintptr_t)p) >> MI_ARENA_BLOCK_SHIFT;
+ const ptrdiff_t ofs = _mi_page_map[up];
+ #if MI_DEBUG
+ if mi_unlikely(ofs==0) return MI_PAGE_PTR_INVALID;
#endif
+ return (mi_page_t*)((up + ofs - 1) << MI_ARENA_BLOCK_SHIFT);
}
-// Segment belonging to a page
-static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
- mi_assert_internal(page!=NULL);
- mi_segment_t* segment = _mi_ptr_segment(page);
- mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]);
- return segment;
-}
-// used internally
-static inline size_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) {
- // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages
- ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
- mi_assert_internal(diff >= 0 && (size_t)diff <= MI_SEGMENT_SIZE /* for huge alignment it can be equal */);
- size_t idx = (size_t)diff >> segment->page_shift;
- mi_assert_internal(idx < segment->capacity);
- mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
- return idx;
-}
-
-// Get the page containing the pointer
-static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
- size_t idx = _mi_segment_page_idx_of(segment, p);
- return &((mi_segment_t*)segment)->pages[idx];
-}
-
-// Quick page start for initialized pages
-static inline uint8_t* mi_page_start(const mi_page_t* page) {
- mi_assert_internal(page->page_start != NULL);
- mi_assert_expensive(_mi_segment_page_start(_mi_page_segment(page),page,NULL) == page->page_start);
- return page->page_start;
-}
-
-// Get the page containing the pointer
-static inline mi_page_t* _mi_ptr_page(void* p) {
- mi_assert_internal(p!=NULL);
- return _mi_segment_page_of(_mi_ptr_segment(p), p);
-}
-
-// Get the block size of a page (special case for huge objects)
+// Get the block size of a page
static inline size_t mi_page_block_size(const mi_page_t* page) {
mi_assert_internal(page->block_size > 0);
return page->block_size;
}
-static inline bool mi_page_is_huge(const mi_page_t* page) {
- mi_assert_internal((page->is_huge && _mi_page_segment(page)->page_kind == MI_PAGE_HUGE) ||
- (!page->is_huge && _mi_page_segment(page)->page_kind != MI_PAGE_HUGE));
- return page->is_huge;
+// Page start
+static inline uint8_t* mi_page_start(const mi_page_t* page) {
+ mi_assert(sizeof(mi_page_t) <= MI_PAGE_INFO_SIZE);
+ return (uint8_t*)page + MI_PAGE_INFO_SIZE;
+}
+
+static inline uint8_t* mi_page_area(const mi_page_t* page, size_t* size) {
+ if (size) { *size = mi_page_block_size(page) * page->reserved; }
+ return mi_page_start(page);
+}
+
+static inline bool mi_page_is_in_arena(const mi_page_t* page) {
+ return (page->memid.memkind == MI_MEM_ARENA);
+}
+
+static inline bool mi_page_is_singleton(const mi_page_t* page) {
+ return (page->reserved == 1);
}
// Get the usable block size of a page without fixed padding.
@@ -515,11 +476,6 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
return mi_page_block_size(page) - MI_PADDING_SIZE;
}
-// size of a segment
-static inline size_t mi_segment_size(mi_segment_t* segment) {
- return segment->segment_size;
-}
-
// Thread free access
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
@@ -534,10 +490,20 @@ static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
}
+static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
+ return mi_atomic_load_relaxed(&page->xthread_id);
+}
+
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
- if (heap != NULL) { page->heap_tag = heap->tag; }
+ if (heap != NULL) {
+ page->heap_tag = heap->tag;
+ mi_atomic_store_release(&page->xthread_id, heap->thread_id);
+ }
+ else {
+ mi_atomic_store_release(&page->xthread_id,0);
+ }
}
// Thread free flag helpers
@@ -576,6 +542,21 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) {
return (page->free != NULL);
}
+
+// is the page not yet used up to its reserved space?
+static inline bool mi_page_is_expandable(const mi_page_t* page) {
+ mi_assert_internal(page != NULL);
+ mi_assert_internal(page->capacity <= page->reserved);
+ return (page->capacity < page->reserved);
+}
+
+
+static inline bool mi_page_is_full(mi_page_t* page) {
+ bool full = (page->reserved == page->used);
+ mi_assert_internal(!full || page->free == NULL);
+ return full;
+}
+
// is more than 7/8th of a page in use?
static inline bool mi_page_mostly_used(const mi_page_t* page) {
if (page==NULL) return true;
@@ -583,6 +564,15 @@ static inline bool mi_page_mostly_used(const mi_page_t* page) {
return (page->reserved - page->used <= frac);
}
+static inline bool mi_page_is_abandoned(mi_page_t* page) {
+ return (mi_page_thread_id(page) == 0);
+}
+
+static inline bool mi_page_is_huge(mi_page_t* page) {
+ return (page->block_size > MI_LARGE_MAX_OBJ_SIZE);
+}
+
+
static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
}
@@ -667,17 +657,8 @@ We also pass a separate `null` value to be used as `NULL` or otherwise
`(k2<<= 655360)
-#error "mimalloc internal: define more bins"
-#endif
-
-// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`)
-#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX)
-
-// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
-#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
+// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated orphan pages
+#define MI_BLOCK_ALIGNMENT_MAX (MI_ARENA_BLOCK_ALIGN)
// We never allocate more than PTRDIFF_MAX (see also )
-#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
+#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
+
+
+// ---------------------------------------------------------------
+// a memory id tracks the provenance of arena/OS allocated memory
+// ---------------------------------------------------------------
+
+// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
+typedef enum mi_memkind_e {
+ MI_MEM_NONE, // not allocated
+ MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
+ MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
+ MI_MEM_OS, // allocated from the OS
+ MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
+ MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
+ MI_MEM_ARENA // allocated from an arena (the usual case)
+} mi_memkind_t;
+
+static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
+ return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
+}
+
+typedef struct mi_memid_os_info {
+ void* base; // actual base address of the block (used for offset aligned allocations)
+ size_t alignment; // alignment at allocation
+} mi_memid_os_info_t;
+
+typedef struct mi_memid_arena_info {
+ size_t block_index; // index in the arena
+ mi_arena_id_t id; // arena id (>= 1)
+ bool is_exclusive; // this arena can only be used for specific arena allocations
+} mi_memid_arena_info_t;
+
+typedef struct mi_memid_s {
+ union {
+ mi_memid_os_info_t os; // only used for MI_MEM_OS
+ mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
+ } mem;
+ bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
+ bool initially_committed;// `true` if the memory was originally allocated as committed
+ bool initially_zero; // `true` if the memory was originally zero initialized
+ mi_memkind_t memkind;
+} mi_memid_t;
+
// ------------------------------------------------------
// Mimalloc pages contain allocated blocks
@@ -223,6 +248,10 @@ typedef union mi_page_flags_s {
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
typedef uintptr_t mi_thread_free_t;
+// Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython)
+typedef struct mi_subproc_s mi_subproc_t;
+
+
// A page contains blocks of one specific size (`block_size`).
// Each page has three list of free blocks:
// `free` for blocks that can be allocated,
@@ -242,8 +271,6 @@ typedef uintptr_t mi_thread_free_t;
// Notes:
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Using `uint16_t` does not seem to slow things down
-// - The size is 10 words on 64-bit which helps the page index calculations
-// (and 12 words on 32-bit, and encoded free lists add 2 words)
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
@@ -252,15 +279,8 @@ typedef uintptr_t mi_thread_free_t;
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s {
- // "owned" by the segment
- uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
- uint8_t segment_in_use:1; // `true` if the segment allocated this page
- uint8_t is_committed:1; // `true` if the page virtual memory is committed
- uint8_t is_zero_init:1; // `true` if the page was initially zero initialized
- uint8_t is_huge:1; // `true` if the page is in a huge segment
-
- // layout like this to optimize access in `mi_malloc` and `mi_free`
- uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
+ mi_memid_t memid; // provenance of the page memory
+ uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation)
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
@@ -272,120 +292,54 @@ typedef struct mi_page_s {
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
// padding
- size_t block_size; // size available in each block (always `>0`)
- uint8_t* page_start; // start of the page area containing the blocks
+ size_t block_size; // size available in each block (always `>0`)
#if (MI_ENCODE_FREELIST || MI_PADDING)
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
#endif
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
- _Atomic(uintptr_t) xheap;
+ _Atomic(uintptr_t) xheap; // heap this threads belong to.
+ _Atomic(mi_threadid_t)xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned)
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
-
- #if MI_INTPTR_SIZE==4 // pad to 12 words on 32-bit
- void* padding[1];
- #endif
} mi_page_t;
+// ------------------------------------------------------
+// Object sizes
+// ------------------------------------------------------
+
+#define MI_PAGE_ALIGN (64)
+#define MI_PAGE_INFO_SIZE (MI_SIZE_SHIFT*MI_PAGE_ALIGN) // should be > sizeof(mi_page_t)
+
+// The max object size are checked to not waste more than 12.5% internally over the page sizes.
+// (Except for large pages since huge objects are allocated in 4MiB chunks)
+#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // ~16KiB
+#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // ~128KiB
+#define MI_LARGE_MAX_OBJ_SIZE ((MI_LARGE_PAGE_SIZE-MI_PAGE_INFO_SIZE)/2) // ~2MiB
+#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
+
+
+#if (MI_LARGE_MAX_OBJ_WSIZE >= 655360)
+#error "mimalloc internal: define more bins"
+#endif
+
// ------------------------------------------------------
-// Mimalloc segments contain mimalloc pages
+// Page kinds
// ------------------------------------------------------
typedef enum mi_page_kind_e {
- MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
- MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment
- MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment
- MI_PAGE_HUGE // a huge page is a single page in a segment of variable size (but still 2MiB aligned)
+ MI_PAGE_SMALL, // small blocks go into 64KiB pages
+ MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages
+ MI_PAGE_LARGE, // larger blocks go into 4MiB pages
+ MI_PAGE_SINGLETON // page containing a single block.
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`.
} mi_page_kind_t;
-// ---------------------------------------------------------------
-// a memory id tracks the provenance of arena/OS allocated memory
-// ---------------------------------------------------------------
-
-// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
-typedef enum mi_memkind_e {
- MI_MEM_NONE, // not allocated
- MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
- MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
- MI_MEM_OS, // allocated from the OS
- MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
- MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
- MI_MEM_ARENA // allocated from an arena (the usual case)
-} mi_memkind_t;
-
-static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
- return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
-}
-
-typedef struct mi_memid_os_info {
- void* base; // actual base address of the block (used for offset aligned allocations)
- size_t alignment; // alignment at allocation
-} mi_memid_os_info_t;
-
-typedef struct mi_memid_arena_info {
- size_t block_index; // index in the arena
- mi_arena_id_t id; // arena id (>= 1)
- bool is_exclusive; // this arena can only be used for specific arena allocations
-} mi_memid_arena_info_t;
-
-typedef struct mi_memid_s {
- union {
- mi_memid_os_info_t os; // only used for MI_MEM_OS
- mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
- } mem;
- bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
- bool initially_committed;// `true` if the memory was originally allocated as committed
- bool initially_zero; // `true` if the memory was originally zero initialized
- mi_memkind_t memkind;
-} mi_memid_t;
-
-
-// ---------------------------------------------------------------
-// Segments contain mimalloc pages
-// ---------------------------------------------------------------
-typedef struct mi_subproc_s mi_subproc_t;
-
-// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS.
-// Inside segments we allocated fixed size _pages_ that contain blocks.
-typedef struct mi_segment_s {
- // constant fields
- mi_memid_t memid; // memory id to track provenance
- bool allow_decommit;
- bool allow_purge;
- size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE`
- mi_subproc_t* subproc; // segment belongs to sub process
-
- // segment fields
- struct mi_segment_s* next; // must be the first (non-constant) segment field -- see `segment.c:segment_init`
- struct mi_segment_s* prev;
- bool was_reclaimed; // true if it was reclaimed (used to limit reclaim-on-free reclamation)
- bool dont_free; // can be temporarily true to ensure the segment is not freed
-
- size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
- size_t abandoned_visits; // count how often this segment is visited for reclaiming (to force reclaim if it is too long)
-
- size_t used; // count of pages in use (`used <= capacity`)
- size_t capacity; // count of available pages (`#free + used`)
- size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages.
- uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
-
- struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled
- struct mi_segment_s* abandoned_os_prev;
-
- // layout like this to optimize access in `mi_free`
- _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
- size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
- mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge
- mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
-} mi_segment_t;
-
// ------------------------------------------------------
// Heaps
@@ -522,21 +476,18 @@ typedef struct mi_stat_counter_s {
} mi_stat_counter_t;
typedef struct mi_stats_s {
- mi_stat_count_t segments;
mi_stat_count_t pages;
mi_stat_count_t reserved;
mi_stat_count_t committed;
mi_stat_count_t reset;
mi_stat_count_t purged;
mi_stat_count_t page_committed;
- mi_stat_count_t segments_abandoned;
mi_stat_count_t pages_abandoned;
mi_stat_count_t threads;
mi_stat_count_t normal;
mi_stat_count_t huge;
mi_stat_count_t giant;
mi_stat_count_t malloc;
- mi_stat_count_t segments_cache;
mi_stat_counter_t pages_extended;
mi_stat_counter_t mmap_calls;
mi_stat_counter_t commit_calls;
@@ -581,12 +532,12 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
// ------------------------------------------------------
struct mi_subproc_s {
- _Atomic(size_t) abandoned_count; // count of abandoned segments for this sub-process
- _Atomic(size_t) abandoned_os_list_count; // count of abandoned segments in the os-list
- mi_lock_t abandoned_os_lock; // lock for the abandoned os segment list (outside of arena's) (this lock protect list operations)
+ _Atomic(size_t) abandoned_count; // count of abandoned pages for this sub-process
+ _Atomic(size_t) abandoned_os_list_count; // count of abandoned pages in the os-list
+ mi_lock_t abandoned_os_lock; // lock for the abandoned os pages list (outside of arena's) (this lock protect list operations)
mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list
- mi_segment_t* abandoned_os_list; // doubly-linked list of abandoned segments outside of arena's (in OS allocated memory)
- mi_segment_t* abandoned_os_list_tail; // the tail-end of the list
+ mi_page_t* abandoned_os_list; // doubly-linked list of abandoned pages outside of arena's (in OS allocated memory)
+ mi_page_t* abandoned_os_list_tail; // the tail-end of the list
mi_memid_t memid; // provenance of this memory block
};
@@ -597,11 +548,6 @@ struct mi_subproc_s {
// Milliseconds as in `int64_t` to avoid overflows
typedef int64_t mi_msecs_t;
-// Queue of segments
-typedef struct mi_segment_queue_s {
- mi_segment_t* first;
- mi_segment_t* last;
-} mi_segment_queue_t;
// OS thread local data
typedef struct mi_os_tld_s {
@@ -609,28 +555,13 @@ typedef struct mi_os_tld_s {
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t;
-// Segments thread local data
-typedef struct mi_segments_tld_s {
- mi_segment_queue_t small_free; // queue of segments with free small pages
- mi_segment_queue_t medium_free; // queue of segments with free medium pages
- mi_page_queue_t pages_purge; // queue of freed pages that are delay purged
- size_t count; // current number of segments;
- size_t peak_count; // peak number of segments
- size_t current_size; // current size of all segments
- size_t peak_size; // peak size of all segments
- size_t reclaim_count;// number of reclaimed (abandoned) segments
- mi_subproc_t* subproc; // sub-process this thread belongs to.
- mi_stats_t* stats; // points to tld stats
- mi_os_tld_t* os; // points to os tld
-} mi_segments_tld_t;
-
// Thread local data
struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count
bool recurse; // true if deferred was called; used to prevent infinite recursion.
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
- mi_segments_tld_t segments; // segment tld
+ mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_os_tld_t os; // os tld
mi_stats_t stats; // statistics
};
diff --git a/src/alloc.c b/src/alloc.c
index a093f108..00f6d1a4 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -82,7 +82,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
#if (MI_STAT>0)
const size_t bsize = mi_page_usable_block_size(page);
- if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+ if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_increase(heap, normal, bsize);
mi_heap_stat_counter_increase(heap, normal_count, 1);
#if (MI_STAT>1)
diff --git a/src/xarena.c b/src/arena-old.c
similarity index 53%
rename from src/xarena.c
rename to src/arena-old.c
index 42943f84..8ca5aaf3 100644
--- a/src/xarena.c
+++ b/src/arena-old.c
@@ -21,834 +21,46 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
#include "mimalloc.h"
#include "mimalloc/internal.h"
-#include "xbitmap.h"
+#include "mimalloc/atomic.h"
+#include "bitmap.h"
/* -----------------------------------------------------------
Arena allocation
----------------------------------------------------------- */
-#define MI_ARENA_BLOCK_SIZE (MI_SMALL_PAGE_SIZE) // 64KiB
-#define MI_ARENA_BLOCK_ALIGN (MI_ARENA_BLOCK_SIZE) // 64KiB
-#define MI_ARENA_BIN_COUNT (MI_BIN_COUNT)
-
-#define MI_ARENA_MIN_OBJ_SIZE MI_ARENA_BLOCK_SIZE
-#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_CHUNK_BITS * MI_ARENA_BLOCK_SIZE) // for now, cannot cross chunk boundaries
-
// A memory arena descriptor
typedef struct mi_arena_s {
mi_arena_id_t id; // arena id; 0 for non-specific
mi_memid_t memid; // memid of the memory area
- // _Atomic(uint8_t*) start; // the start of the memory area
- // size_t meta_size; // size of the arena structure itself (including its bitmaps)
- // mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
+ _Atomic(uint8_t*)start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
+ size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
+ size_t meta_size; // size of the arena structure itself (including its bitmaps)
+ mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
int numa_node; // associated NUMA node
bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed)
mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited
- _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
-
- mi_bitmap_t blocks_free; // is the block free?
- mi_bitmap_t blocks_committed; // is the block committed? (i.e. accessible)
- mi_bitmap_t blocks_purge; // can the block be purged? (block in purge => block in free)
- mi_bitmap_t blocks_dirty; // is the block potentially non-zero?
- mi_bitmap_t blocks_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page)
- // the full queue contains abandoned full pages
+ _Atomic(size_t)search_idx; // optimization to start the search for free blocks
+ _Atomic(mi_msecs_t)purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
+ mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
+ mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
+ mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
+ mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
+ mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
+ // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields.
} mi_arena_t;
-#define MI_MAX_ARENAS (1024) // Limited for now (and takes up .bss)
+
+#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN)
+#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB
+#define MI_MAX_ARENAS (132) // Limited as the reservation exponentially increases (and takes up .bss)
// The available arenas
static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0
-
-/* -----------------------------------------------------------
- Arena id's
- id = arena_index + 1
------------------------------------------------------------ */
-
-size_t mi_arena_id_index(mi_arena_id_t id) {
- return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1);
-}
-
-static mi_arena_id_t mi_arena_id_create(size_t arena_index) {
- mi_assert_internal(arena_index < MI_MAX_ARENAS);
- return (int)arena_index + 1;
-}
-
-mi_arena_id_t _mi_arena_id_none(void) {
- return 0;
-}
-
-static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) {
- return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) ||
- (arena_id == req_arena_id));
-}
-
-bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) {
- if (memid.memkind == MI_MEM_ARENA) {
- return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
- }
- else {
- return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id);
- }
-}
-
-size_t mi_arena_get_count(void) {
- return mi_atomic_load_relaxed(&mi_arena_count);
-}
-
-mi_arena_t* mi_arena_from_index(size_t idx) {
- mi_assert_internal(idx < mi_arena_get_count());
- return mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[idx]);
-}
-
-
-
-/* -----------------------------------------------------------
- Util
------------------------------------------------------------ */
-
-// Blocks needed for a given byte size
-static size_t mi_block_count_of_size(size_t size) {
- return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
-}
-
-// Byte size of a number of blocks
-static size_t mi_size_of_blocks(size_t bcount) {
- return (bcount * MI_ARENA_BLOCK_SIZE);
-}
-
-// Size of an arena
-static size_t mi_arena_size(mi_arena_t* arena) {
- return mi_size_of_blocks(arena->block_count);
-}
-
-static size_t mi_arena_info_blocks(void) {
- const size_t os_page_size = _mi_os_page_size();
- const size_t info_size = _mi_align_up(sizeof(mi_arena_t), os_page_size) + os_page_size; // + guard page
- const size_t info_blocks = mi_block_count_of_size(info_size);
- return info_blocks;
-}
-
-
-// Start of the arena memory area
-static uint8_t* mi_arena_start(mi_arena_t* arena) {
- return ((uint8_t*)arena);
-}
-
-// Start of a block
-void* mi_arena_block_start(mi_arena_t* arena, size_t block_index) {
- return (mi_arena_start(arena) + mi_size_of_blocks(block_index));
-}
-
-// Arena area
-void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
- if (size != NULL) *size = 0;
- const size_t arena_index = mi_arena_id_index(arena_id);
- if (arena_index >= MI_MAX_ARENAS) return NULL;
- mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
- if (arena == NULL) return NULL;
- if (size != NULL) { *size = mi_size_of_blocks(arena->block_count); }
- return mi_arena_start(arena);
-}
-
-
-// Create an arena memid
-static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, size_t block_index) {
- mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA);
- memid.mem.arena.id = id;
- memid.mem.arena.block_index = block_index;
- memid.mem.arena.is_exclusive = is_exclusive;
- return memid;
-}
-
-// returns if the arena is exclusive
-bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, size_t* block_index) {
- mi_assert_internal(memid.memkind == MI_MEM_ARENA);
- *arena_index = mi_arena_id_index(memid.mem.arena.id);
- *block_index = memid.mem.arena.block_index;
- return memid.mem.arena.is_exclusive;
-}
-
-
-
-/* -----------------------------------------------------------
- Arena Allocation
------------------------------------------------------------ */
-
-static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
- bool commit, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld)
-{
- MI_UNUSED(arena_index);
- mi_assert_internal(mi_arena_id_index(arena->id) == arena_index);
-
- size_t block_index;
- if (!mi_bitmap_try_find_and_clearN(&arena->blocks_free, tseq, needed_bcount, &block_index)) return NULL;
-
- // claimed it!
- void* p = mi_arena_block_start(arena, block_index);
- *memid = mi_memid_create_arena(arena->id, arena->exclusive, block_index);
- memid->is_pinned = arena->memid.is_pinned;
-
- // set the dirty bits
- if (arena->memid.initially_zero) {
- memid->initially_zero = mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_dirty, block_index, needed_bcount, NULL);
- }
-
- // set commit state
- if (commit) {
- // commit requested, but the range may not be committed as a whole: ensure it is committed now
- memid->initially_committed = true;
-
- bool all_already_committed;
- mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount, &all_already_committed);
- if (!all_already_committed) {
- bool commit_zero = false;
- if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) {
- memid->initially_committed = false;
- }
- else {
- if (commit_zero) { memid->initially_zero = true; }
- }
- }
- }
- else {
- // no need to commit, but check if already fully committed
- memid->initially_committed = mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount);
- }
-
- return p;
-}
-
-// allocate in a speficic arena
-static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node,
- size_t size, size_t alignment,
- bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld)
-{
- mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN);
- if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL;
-
- const size_t bcount = mi_block_count_of_size(size);
- const size_t arena_index = mi_arena_id_index(arena_id);
- mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
- mi_assert_internal(size <= mi_size_of_blocks(bcount));
-
- // Check arena suitability
- mi_arena_t* arena = mi_arena_from_index(arena_index);
- if (arena == NULL) return NULL;
- if (!allow_large && arena->is_large) return NULL;
- if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL;
- if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity
- const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
- if (match_numa_node) { if (!numa_suitable) return NULL; }
- else { if (numa_suitable) return NULL; }
- }
-
- // try to allocate
- void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, tseq, memid, tld);
- mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment));
- return p;
-}
-
-
-// allocate from an arena with fallback to the OS
-static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment,
- bool commit, bool allow_large,
- mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld)
-{
- mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN);
- if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL;
-
- const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
- if mi_likely(max_arena == 0) return NULL;
-
- if (req_arena_id != _mi_arena_id_none()) {
- // try a specific arena if requested
- if (mi_arena_id_index(req_arena_id) < max_arena) {
- void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
- if (p != NULL) return p;
- }
- }
- else {
- // try numa affine allocation
- for (size_t i = 0; i < max_arena; i++) {
- void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
- if (p != NULL) return p;
- }
-
- // try from another numa node instead..
- if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already
- for (size_t i = 0; i < max_arena; i++) {
- void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
- if (p != NULL) return p;
- }
- }
- }
- return NULL;
-}
-
-// try to reserve a fresh arena space
-static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id)
-{
- if (_mi_preloading()) return false; // use OS only while pre loading
- if (req_arena_id != _mi_arena_id_none()) return false;
-
- const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count);
- if (arena_count > (MI_MAX_ARENAS - 4)) return false;
-
- // calc reserve
- size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve);
- if (arena_reserve == 0) return false;
-
- if (!_mi_os_has_virtual_reserve()) {
- arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example)
- }
- arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
-
- if (arena_count >= 8 && arena_count <= 128) {
- // scale up the arena sizes exponentially every 8 entries (128 entries get to 589TiB)
- const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16);
- size_t reserve = 0;
- if (!mi_mul_overflow(multiplier, arena_reserve, &reserve)) {
- arena_reserve = reserve;
- }
- }
-
- // check arena bounds
- const size_t min_reserve = mi_size_of_blocks(mi_arena_info_blocks() + 1);
- const size_t max_reserve = MI_BITMAP_MAX_BITS * MI_ARENA_BLOCK_SIZE;
- if (arena_reserve < min_reserve) {
- arena_reserve = min_reserve;
- }
- else if (arena_reserve > max_reserve) {
- arena_reserve = max_reserve;
- }
-
- if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size
-
- // commit eagerly?
- bool arena_commit = false;
- if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); }
- else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
-
- return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0);
-}
-
-
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large,
- mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
-{
- mi_assert_internal(memid != NULL && tld != NULL);
- mi_assert_internal(size > 0);
- size_t tseq = _mi_thread_seq_id();
- *memid = _mi_memid_none();
-
- const int numa_node = _mi_os_numa_node(tld); // current numa node
-
- // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
- if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed?
- if (size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && alignment <= MI_ARENA_BLOCK_ALIGN && align_offset == 0) {
- void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
- if (p != NULL) return p;
-
- // otherwise, try to first eagerly reserve a new arena
- if (req_arena_id == _mi_arena_id_none()) {
- mi_arena_id_t arena_id = 0;
- if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
- // and try allocate in there
- mi_assert_internal(req_arena_id == _mi_arena_id_none());
- p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
- if (p != NULL) return p;
- }
- }
- }
- }
-
- // if we cannot use OS allocation, return NULL
- if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) {
- errno = ENOMEM;
- return NULL;
- }
-
- // finally, fall back to the OS
- if (align_offset > 0) {
- return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats);
- }
- else {
- return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats);
- }
-}
-
-void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
-{
- return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld);
-}
-
-
-/* -----------------------------------------------------------
- Arena free
------------------------------------------------------------ */
-static void mi_arena_schedule_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats);
-static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats);
-
-void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) {
- mi_assert_internal(size > 0 && stats != NULL);
- mi_assert_internal(committed_size <= size);
- if (p==NULL) return;
- if (size==0) return;
- const bool all_committed = (committed_size == size);
-
- // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
- mi_track_mem_undefined(p, size);
-
- if (mi_memkind_is_os(memid.memkind)) {
- // was a direct OS allocation, pass through
- if (!all_committed && committed_size > 0) {
- // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size)
- _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
- }
- _mi_os_free(p, size, memid, stats);
- }
- else if (memid.memkind == MI_MEM_ARENA) {
- // allocated in an arena
- size_t arena_idx;
- size_t block_idx;
- mi_arena_memid_indices(memid, &arena_idx, &block_idx);
- mi_assert_internal(arena_idx < MI_MAX_ARENAS);
- mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
- mi_assert_internal(arena != NULL);
- const size_t blocks = mi_block_count_of_size(size);
-
- // checks
- if (arena == NULL) {
- _mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
- return;
- }
- mi_assert_internal(block_idx < arena->block_count);
- mi_assert_internal(block_idx > mi_arena_info_blocks());
- if (block_idx <= mi_arena_info_blocks() || block_idx > arena->block_count) {
- _mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
- return;
- }
-
- // potentially decommit
- if (arena->memid.is_pinned || arena->memid.initially_committed) {
- mi_assert_internal(all_committed);
- }
- else {
- if (!all_committed) {
- // mark the entire range as no longer committed (so we recommit the full range when re-using)
- mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_committed, blocks, block_idx, NULL);
- mi_track_mem_noaccess(p, size);
- if (committed_size > 0) {
- // if partially committed, adjust the committed stats (is it will be recommitted when re-using)
- // in the delayed purge, we now need to not count a decommit if the range is not marked as committed.
- _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
- }
- // note: if not all committed, it may be that the purge will reset/decommit the entire range
- // that contains already decommitted parts. Since purge consistently uses reset or decommit that
- // works (as we should never reset decommitted parts).
- }
- // (delay) purge the entire range
- mi_arena_schedule_purge(arena, block_idx, blocks, stats);
- }
-
- // and make it available to others again
- bool all_inuse = mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_free, block_idx, blocks, NULL);
- if (!all_inuse) {
- _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size);
- return;
- };
- }
- else {
- // arena was none, external, or static; nothing to do
- mi_assert_internal(memid.memkind < MI_MEM_OS);
- }
-
- // purge expired decommits
- mi_arenas_try_purge(false, false, stats);
-}
-
-// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
-// for dynamic libraries that are unloaded and need to release all their allocated memory.
-static void mi_arenas_unsafe_destroy(void) {
- const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
- size_t new_max_arena = 0;
- for (size_t i = 0; i < max_arena; i++) {
- mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
- if (arena != NULL) {
- mi_lock_done(&arena->abandoned_visit_lock);
- if (mi_memkind_is_os(arena->memid.memkind)) {
- mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
- _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid, &_mi_stats_main);
- }
- }
- }
-
- // try to lower the max arena.
- size_t expected = max_arena;
- mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena);
-}
-
-// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired
-void _mi_arenas_collect(bool force_purge, mi_stats_t* stats) {
- mi_arenas_try_purge(force_purge, force_purge /* visit all? */, stats);
-}
-
-// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
-// for dynamic libraries that are unloaded and need to release all their allocated memory.
-void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
- mi_arenas_unsafe_destroy();
- _mi_arenas_collect(true /* force purge */, stats); // purge non-owned arenas
-}
-
-// Is a pointer inside any of our arenas?
-bool _mi_arena_contains(const void* p) {
- const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
- for (size_t i = 0; i < max_arena; i++) {
- mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
- if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_blocks(arena->block_count) > (const uint8_t*)p) {
- return true;
- }
- }
- return false;
-}
-
-
-/* -----------------------------------------------------------
- Add an arena.
------------------------------------------------------------ */
-
-static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) {
- mi_assert_internal(arena != NULL);
- mi_assert_internal(arena->block_count > 0);
- if (arena_id != NULL) { *arena_id = -1; }
-
- size_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
- if (i >= MI_MAX_ARENAS) {
- mi_atomic_decrement_acq_rel(&mi_arena_count);
- return false;
- }
- _mi_stat_counter_increase(&stats->arena_count,1);
- arena->id = mi_arena_id_create(i);
- mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
- if (arena_id != NULL) { *arena_id = arena->id; }
- return true;
-}
-
-static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
-{
- mi_assert(!is_large || memid.initially_committed && memid.is_pinned);
- mi_assert(_mi_is_aligned(start,MI_ARENA_BLOCK_SIZE));
- mi_assert(start!=NULL);
- if (start==NULL) return false;
- if (!_mi_is_aligned(start,MI_ARENA_BLOCK_SIZE)) {
- // todo: use alignment in memid to align to blocksize first?
- _mi_warning_message("cannot use OS memory since it is not aligned to %zu KiB (address %p)", MI_ARENA_BLOCK_SIZE/MI_KiB, start);
- return false;
- }
-
- if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); }
-
- const size_t info_blocks = mi_arena_info_blocks();
- const size_t bcount = size / MI_ARENA_BLOCK_SIZE; // divide down
- if (bcount < info_blocks+1) {
- _mi_warning_message("cannot use OS memory since it is not large enough (size %zu KiB, minimum required is %zu KiB)", size/MI_KiB, mi_size_of_blocks(info_blocks+1)/MI_KiB);
- return false;
- }
- if (bcount > MI_BITMAP_MAX_BITS) {
- // todo: allow larger areas (either by splitting it up in arena's or having larger arena's)
- _mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_blocks(MI_BITMAP_MAX_BITS)/MI_MiB);
- return false;
- }
- mi_arena_t* arena = (mi_arena_t*)start;
-
- // commit & zero if needed
- bool is_zero = memid.initially_zero;
- if (!memid.initially_committed) {
- _mi_os_commit(arena, mi_size_of_blocks(info_blocks), &is_zero, &_mi_stats_main);
- }
- if (!is_zero) {
- _mi_memzero(arena, mi_size_of_blocks(info_blocks));
- }
-
- // init
- arena->id = _mi_arena_id_none();
- arena->memid = memid;
- arena->exclusive = exclusive;
- arena->block_count = bcount;
- arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
- arena->is_large = is_large;
- arena->purge_expire = 0;
- mi_lock_init(&arena->abandoned_visit_lock);
-
- // init bitmaps
- mi_bitmap_init(&arena->blocks_free,true);
- mi_bitmap_init(&arena->blocks_committed,true);
- mi_bitmap_init(&arena->blocks_dirty,true);
- mi_bitmap_init(&arena->blocks_purge,true);
- for( int i = 0; i < MI_ARENA_BIN_COUNT; i++) {
- mi_bitmap_init(&arena->blocks_abandoned[i],true);
- }
-
- // reserve our meta info (and reserve blocks outside the memory area)
- mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->blocks_free, info_blocks /* start */, arena->block_count - info_blocks);
- if (memid.initially_committed) {
- mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->blocks_committed, 0, arena->block_count);
- }
- else {
- mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, 0, info_blocks, NULL);
- }
- mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_dirty, 0, info_blocks, NULL);
-
- return mi_arena_add(arena, arena_id, &_mi_stats_main);
-}
-
-
-bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
- mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
- memid.initially_committed = is_committed;
- memid.initially_zero = is_zero;
- memid.is_pinned = is_large;
- return mi_manage_os_memory_ex2(start, size, is_large, numa_node, exclusive, memid, arena_id);
-}
-
-// Reserve a range of regular OS memory
-int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
- if (arena_id != NULL) *arena_id = _mi_arena_id_none();
- size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
- mi_memid_t memid;
- void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main);
- if (start == NULL) return ENOMEM;
- const bool is_large = memid.is_pinned; // todo: use separate is_large field?
- if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) {
- _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main);
- _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024));
- return ENOMEM;
- }
- _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : "");
- return 0;
-}
-
-
-// Manage a range of regular OS memory
-bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
- return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL);
-}
-
-// Reserve a range of regular OS memory
-int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept {
- return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL);
-}
-
-
-/* -----------------------------------------------------------
- Debugging
------------------------------------------------------------ */
-static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf) {
- size_t bit_set_count = 0;
- for (int bit = 0; bit < MI_BFIELD_BITS; bit++) {
- bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0);
- if (is_set) bit_set_count++;
- buf[bit] = (is_set ? 'x' : '.');
- }
- return bit_set_count;
-}
-
-static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_t* bitmap) {
- _mi_verbose_message("%s%s:\n", prefix, header);
- size_t bit_count = 0;
- size_t bit_set_count = 0;
- for (int i = 0; i < MI_BFIELD_BITS && bit_count < block_count; i++) {
- char buf[MI_BITMAP_CHUNK_BITS + 1];
- mi_bitmap_chunk_t* chunk = &bitmap->chunks[i];
- for (int j = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) {
- if (bit_count < block_count) {
- bit_set_count += mi_debug_show_bfield(chunk->bfields[j], buf + j*MI_BFIELD_BITS);
- }
- else {
- _mi_memset(buf + j*MI_BFIELD_BITS, ' ', MI_BFIELD_BITS);
- }
- bit_count += MI_BFIELD_BITS;
- }
- buf[MI_BITMAP_CHUNK_BITS] = 0;
- _mi_verbose_message("%s %s\n", prefix, buf);
- }
- _mi_verbose_message("%s total ('x'): %zu\n", prefix, bit_set_count);
- return bit_set_count;
-}
-
-void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept {
- MI_UNUSED(show_abandoned);
- size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count);
- size_t free_total = 0;
- size_t block_total = 0;
- //size_t abandoned_total = 0;
- size_t purge_total = 0;
- for (size_t i = 0; i < max_arenas; i++) {
- mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
- if (arena == NULL) break;
- block_total += arena->block_count;
- _mi_verbose_message("arena %zu: %zu blocks%s\n", i, arena->block_count, (arena->memid.is_pinned ? ", pinned" : ""));
- if (show_inuse) {
- free_total += mi_debug_show_bitmap(" ", "free blocks", arena->block_count, &arena->blocks_free);
- }
- mi_debug_show_bitmap(" ", "committed blocks", arena->block_count, &arena->blocks_committed);
- // todo: abandoned blocks
- if (show_purge) {
- purge_total += mi_debug_show_bitmap(" ", "purgeable blocks", arena->block_count, &arena->blocks_purge);
- }
- }
- if (show_inuse) _mi_verbose_message("total inuse blocks : %zu\n", block_total - free_total);
- // if (show_abandoned) _mi_verbose_message("total abandoned blocks: %zu\n", abandoned_total);
- if (show_purge) _mi_verbose_message("total purgeable blocks: %zu\n", purge_total);
-}
-
-
-/* -----------------------------------------------------------
- Reserve a huge page arena.
------------------------------------------------------------ */
-// reserve at a specific numa node
-int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
- if (arena_id != NULL) *arena_id = -1;
- if (pages==0) return 0;
- if (numa_node < -1) numa_node = -1;
- if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
- size_t hsize = 0;
- size_t pages_reserved = 0;
- mi_memid_t memid;
- void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid);
- if (p==NULL || pages_reserved==0) {
- _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
- return ENOMEM;
- }
- _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
-
- if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) {
- _mi_os_free(p, hsize, memid, &_mi_stats_main);
- return ENOMEM;
- }
- return 0;
-}
-
-int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
- return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL);
-}
-
-// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
-int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
- if (pages == 0) return 0;
-
- // pages per numa node
- size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
- if (numa_count <= 0) numa_count = 1;
- const size_t pages_per = pages / numa_count;
- const size_t pages_mod = pages % numa_count;
- const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
-
- // reserve evenly among numa nodes
- for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
- size_t node_pages = pages_per; // can be 0
- if (numa_node < pages_mod) node_pages++;
- int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
- if (err) return err;
- if (pages < node_pages) {
- pages = 0;
- }
- else {
- pages -= node_pages;
- }
- }
-
- return 0;
-}
-
-int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
- MI_UNUSED(max_secs);
- _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
- if (pages_reserved != NULL) *pages_reserved = 0;
- int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
- if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
- return err;
-}
-
-
-
-/* -----------------------------------------------------------
- Arena purge
------------------------------------------------------------ */
-
-static long mi_arena_purge_delay(void) {
- // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay
- return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult));
-}
-
-// reset or decommit in an arena and update the committed/decommit bitmaps
-// assumes we own the area (i.e. blocks_free is claimed by us)
-static void mi_arena_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats) {
- mi_assert_internal(!arena->memid.is_pinned);
- const size_t size = mi_size_of_blocks(blocks);
- void* const p = mi_arena_block_start(arena, block_idx);
- bool needs_recommit;
- if (mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_idx, blocks)) {
- // all blocks are committed, we can purge freely
- needs_recommit = _mi_os_purge(p, size, stats);
- }
- else {
- // some blocks are not committed -- this can happen when a partially committed block is freed
- // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
- // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
- // and also undo the decommit stats (as it was already adjusted)
- mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
- needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats);
- if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); }
- }
-
- // clear the purged blocks
- mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_purge, blocks, block_idx, NULL);
-
- // update committed bitmap
- if (needs_recommit) {
- mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_committed, blocks, block_idx, NULL);
- }
-}
-
-
-// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls.
-// Note: assumes we (still) own the area as we may purge immediately
-static void mi_arena_schedule_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats) {
- const long delay = mi_arena_purge_delay();
- if (delay < 0) return; // is purging allowed at all?
-
- if (_mi_preloading() || delay == 0) {
- // decommit directly
- mi_arena_purge(arena, block_idx, blocks, stats);
- }
- else {
- // schedule decommit
- _mi_error_message(EFAULT, "purging not yet implemented\n");
- }
-}
-
-
-static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats) {
- if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled
-
- const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count);
- if (max_arena == 0) return;
-
- _mi_error_message(EFAULT, "purging not yet implemented\n");
- MI_UNUSED(stats);
- MI_UNUSED(visit_all);
- MI_UNUSED(force);
-}
-
-
-#if 0
-
#define MI_IN_ARENA_C
#include "arena-abandon.c"
#undef MI_IN_ARENA_C
@@ -904,12 +116,12 @@ static size_t mi_block_count_of_size(size_t size) {
return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
}
-static size_t mi_size_of_blocks(size_t bcount) {
+static size_t mi_arena_block_size(size_t bcount) {
return (bcount * MI_ARENA_BLOCK_SIZE);
}
static size_t mi_arena_size(mi_arena_t* arena) {
- return mi_size_of_blocks(arena->block_count);
+ return mi_arena_block_size(arena->block_count);
}
static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) {
@@ -995,7 +207,7 @@ void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) {
}
void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) {
- return (arena->start + mi_size_of_blocks(mi_bitmap_index_bit(bindex)));
+ return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex)));
}
@@ -1004,7 +216,7 @@ void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) {
----------------------------------------------------------- */
// claim the `blocks_inuse` bits
-static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, size_t block_idx, mi_stats_t* stats)
+static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
{
size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter
if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx, stats)) {
@@ -1056,7 +268,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
if (any_uncommitted) {
bool commit_zero = false;
- if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) {
+ if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) {
memid->initially_committed = false;
}
else {
@@ -1081,7 +293,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no
const size_t bcount = mi_block_count_of_size(size);
const size_t arena_index = mi_arena_id_index(arena_id);
mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
- mi_assert_internal(size <= mi_size_of_blocks(bcount));
+ mi_assert_internal(size <= mi_arena_block_size(bcount));
// Check arena suitability
mi_arena_t* arena = mi_arena_from_index(arena_index);
@@ -1227,7 +439,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
if (arena_index >= MI_MAX_ARENAS) return NULL;
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
if (arena == NULL) return NULL;
- if (size != NULL) { *size = mi_size_of_blocks(arena->block_count); }
+ if (size != NULL) { *size = mi_arena_block_size(arena->block_count); }
return arena->start;
}
@@ -1247,7 +459,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
mi_assert_internal(arena->blocks_committed != NULL);
mi_assert_internal(arena->blocks_purge != NULL);
mi_assert_internal(!arena->memid.is_pinned);
- const size_t size = mi_size_of_blocks(blocks);
+ const size_t size = mi_arena_block_size(blocks);
void* const p = mi_arena_block_start(arena, bitmap_idx);
bool needs_recommit;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
@@ -1299,25 +511,25 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
// purge a range of blocks
// return true if the full range was purged.
// assumes we own the area (i.e. blocks_in_use is claimed by us)
-static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startseqx, size_t bitlen, size_t purge, mi_stats_t* stats) {
- const size_t endidx = startseqx + bitlen;
- size_t bitseqx = startseqx;
+static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) {
+ const size_t endidx = startidx + bitlen;
+ size_t bitidx = startidx;
bool all_purged = false;
- while (bitseqx < endidx) {
+ while (bitidx < endidx) {
// count consecutive ones in the purge mask
size_t count = 0;
- while (bitseqx + count < endidx && (purge & ((size_t)1 << (bitseqx + count))) != 0) {
+ while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) {
count++;
}
if (count > 0) {
// found range to be purged
- const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitseqx);
+ const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx);
mi_arena_purge(arena, range_idx, count, stats);
if (count == bitlen) {
all_purged = true;
}
}
- bitseqx += (count+1); // +1 to skip the zero bit (or end)
+ bitidx += (count+1); // +1 to skip the zero bit (or end)
}
return all_purged;
}
@@ -1339,16 +551,16 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi
for (size_t i = 0; i < arena->field_count; i++) {
size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]);
if (purge != 0) {
- size_t bitseqx = 0;
- while (bitseqx < MI_BITMAP_FIELD_BITS) {
+ size_t bitidx = 0;
+ while (bitidx < MI_BITMAP_FIELD_BITS) {
// find consecutive range of ones in the purge mask
size_t bitlen = 0;
- while (bitseqx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitseqx + bitlen))) != 0) {
+ while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) {
bitlen++;
}
// temporarily claim the purge range as "in-use" to be thread-safe with allocation
// try to claim the longest range of corresponding in_use bits
- const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitseqx);
+ const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx);
while( bitlen > 0 ) {
if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) {
break;
@@ -1359,15 +571,15 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi
if (bitlen > 0) {
// read purge again now that we have the in_use bits
purge = mi_atomic_load_acquire(&arena->blocks_purge[i]);
- if (!mi_arena_purge_range(arena, i, bitseqx, bitlen, purge, stats)) {
+ if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) {
full_purge = false;
}
any_purged = true;
// release the claimed `in_use` bits again
_mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index);
}
- bitseqx += (bitlen+1); // +1 to skip the zero (or end)
- } // while bitseqx
+ bitidx += (bitlen+1); // +1 to skip the zero (or end)
+ } // while bitidx
} // purge != 0
}
// if not fully purged, make sure to purge again in the future
@@ -1530,7 +742,7 @@ bool _mi_arena_contains(const void* p) {
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
- if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_size_of_blocks(arena->block_count) > (const uint8_t*)p) {
+ if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
return true;
}
}
@@ -1606,8 +818,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
mi_assert_internal(post >= 0);
if (post > 0) {
// don't use leftover bits at the end
- mi_bitmap_index_t postseqx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
- _mi_bitmap_claim(arena->blocks_inuse, fields, post, postseqx, NULL);
+ mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
+ _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
}
return mi_arena_add(arena, arena_id, &_mi_stats_main);
@@ -1774,4 +986,3 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
}
-#endif
\ No newline at end of file
diff --git a/src/arena.c b/src/arena.c
index 8ca5aaf3..28ad61f1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,7 +21,6 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
#include "mimalloc.h"
#include "mimalloc/internal.h"
-#include "mimalloc/atomic.h"
#include "bitmap.h"
@@ -29,38 +28,823 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
Arena allocation
----------------------------------------------------------- */
+#define MI_ARENA_BIN_COUNT (MI_BIN_COUNT)
+
+
// A memory arena descriptor
typedef struct mi_arena_s {
- mi_arena_id_t id; // arena id; 0 for non-specific
mi_memid_t memid; // memid of the memory area
- _Atomic(uint8_t*)start; // the start of the memory area
+ mi_arena_id_t id; // arena id; 0 for non-specific
+
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
- size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
- size_t meta_size; // size of the arena structure itself (including its bitmaps)
- mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
int numa_node; // associated NUMA node
bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed)
mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited
- _Atomic(size_t)search_idx; // optimization to start the search for free blocks
- _Atomic(mi_msecs_t)purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
- mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
- mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
- mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
- mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
- mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
- // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields.
+ _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
+
+ mi_bitmap_t blocks_free; // is the block free?
+ mi_bitmap_t blocks_committed; // is the block committed? (i.e. accessible)
+ mi_bitmap_t blocks_purge; // can the block be purged? (block in purge => block in free)
+ mi_bitmap_t blocks_dirty; // is the block potentially non-zero?
+ mi_bitmap_t blocks_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page)
+ // the full queue contains abandoned full pages
} mi_arena_t;
-
-#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN)
-#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB
-#define MI_MAX_ARENAS (132) // Limited as the reservation exponentially increases (and takes up .bss)
+#define MI_MAX_ARENAS (1024) // Limited for now (and takes up .bss)
// The available arenas
static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0
+
+/* -----------------------------------------------------------
+ Arena id's
+ id = arena_index + 1
+----------------------------------------------------------- */
+
+size_t mi_arena_id_index(mi_arena_id_t id) {
+ return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1);
+}
+
+static mi_arena_id_t mi_arena_id_create(size_t arena_index) {
+ mi_assert_internal(arena_index < MI_MAX_ARENAS);
+ return (int)arena_index + 1;
+}
+
+mi_arena_id_t _mi_arena_id_none(void) {
+ return 0;
+}
+
+static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) {
+ return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) ||
+ (arena_id == req_arena_id));
+}
+
+bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) {
+ if (memid.memkind == MI_MEM_ARENA) {
+ return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
+ }
+ else {
+ return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id);
+ }
+}
+
+size_t mi_arena_get_count(void) {
+ return mi_atomic_load_relaxed(&mi_arena_count);
+}
+
+mi_arena_t* mi_arena_from_index(size_t idx) {
+ mi_assert_internal(idx < mi_arena_get_count());
+ return mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[idx]);
+}
+
+
+
+/* -----------------------------------------------------------
+ Util
+----------------------------------------------------------- */
+
+
+// Size of an arena
+static size_t mi_arena_size(mi_arena_t* arena) {
+ return mi_size_of_blocks(arena->block_count);
+}
+
+static size_t mi_arena_info_blocks(void) {
+ const size_t os_page_size = _mi_os_page_size();
+ const size_t info_size = _mi_align_up(sizeof(mi_arena_t), os_page_size) + os_page_size; // + guard page
+ const size_t info_blocks = mi_block_count_of_size(info_size);
+ return info_blocks;
+}
+
+
+// Start of the arena memory area
+static uint8_t* mi_arena_start(mi_arena_t* arena) {
+ return ((uint8_t*)arena);
+}
+
+// Start of a block
+void* mi_arena_block_start(mi_arena_t* arena, size_t block_index) {
+ return (mi_arena_start(arena) + mi_size_of_blocks(block_index));
+}
+
+// Arena area
+void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
+ if (size != NULL) *size = 0;
+ const size_t arena_index = mi_arena_id_index(arena_id);
+ if (arena_index >= MI_MAX_ARENAS) return NULL;
+ mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
+ if (arena == NULL) return NULL;
+ if (size != NULL) { *size = mi_size_of_blocks(arena->block_count); }
+ return mi_arena_start(arena);
+}
+
+
+// Create an arena memid
+static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, size_t block_index) {
+ mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA);
+ memid.mem.arena.id = id;
+ memid.mem.arena.block_index = block_index;
+ memid.mem.arena.is_exclusive = is_exclusive;
+ return memid;
+}
+
+// returns if the arena is exclusive
+bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, size_t* block_index) {
+ mi_assert_internal(memid.memkind == MI_MEM_ARENA);
+ *arena_index = mi_arena_id_index(memid.mem.arena.id);
+ *block_index = memid.mem.arena.block_index;
+ return memid.mem.arena.is_exclusive;
+}
+
+
+
+/* -----------------------------------------------------------
+ Arena Allocation
+----------------------------------------------------------- */
+
+static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
+ bool commit, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld)
+{
+ MI_UNUSED(arena_index);
+ mi_assert_internal(mi_arena_id_index(arena->id) == arena_index);
+
+ size_t block_index;
+ if (!mi_bitmap_try_find_and_clearN(&arena->blocks_free, tseq, needed_bcount, &block_index)) return NULL;
+
+ // claimed it!
+ void* p = mi_arena_block_start(arena, block_index);
+ *memid = mi_memid_create_arena(arena->id, arena->exclusive, block_index);
+ memid->is_pinned = arena->memid.is_pinned;
+
+ // set the dirty bits
+ if (arena->memid.initially_zero) {
+ memid->initially_zero = mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_dirty, block_index, needed_bcount, NULL);
+ }
+
+ // set commit state
+ if (commit) {
+ // commit requested, but the range may not be committed as a whole: ensure it is committed now
+ memid->initially_committed = true;
+
+ bool all_already_committed;
+ mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount, &all_already_committed);
+ if (!all_already_committed) {
+ bool commit_zero = false;
+ if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) {
+ memid->initially_committed = false;
+ }
+ else {
+ if (commit_zero) { memid->initially_zero = true; }
+ }
+ }
+ }
+ else {
+ // no need to commit, but check if already fully committed
+ memid->initially_committed = mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_index, needed_bcount);
+ }
+
+ return p;
+}
+
+// allocate in a speficic arena
+static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node,
+ size_t size, size_t alignment,
+ bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld)
+{
+ mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN);
+ if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL;
+
+ const size_t bcount = mi_block_count_of_size(size);
+ const size_t arena_index = mi_arena_id_index(arena_id);
+ mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
+ mi_assert_internal(size <= mi_size_of_blocks(bcount));
+
+ // Check arena suitability
+ mi_arena_t* arena = mi_arena_from_index(arena_index);
+ if (arena == NULL) return NULL;
+ if (!allow_large && arena->is_large) return NULL;
+ if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL;
+ if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity
+ const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
+ if (match_numa_node) { if (!numa_suitable) return NULL; }
+ else { if (numa_suitable) return NULL; }
+ }
+
+ // try to allocate
+ void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, tseq, memid, tld);
+ mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment));
+ return p;
+}
+
+
+// allocate from an arena with fallback to the OS
+static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment,
+ bool commit, bool allow_large,
+ mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid, mi_os_tld_t* tld)
+{
+ mi_assert(alignment <= MI_ARENA_BLOCK_ALIGN);
+ if (alignment > MI_ARENA_BLOCK_ALIGN) return NULL;
+
+ const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+ if mi_likely(max_arena == 0) return NULL;
+
+ if (req_arena_id != _mi_arena_id_none()) {
+ // try a specific arena if requested
+ if (mi_arena_id_index(req_arena_id) < max_arena) {
+ void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
+ if (p != NULL) return p;
+ }
+ }
+ else {
+ // try numa affine allocation
+ for (size_t i = 0; i < max_arena; i++) {
+ void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
+ if (p != NULL) return p;
+ }
+
+ // try from another numa node instead..
+ if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already
+ for (size_t i = 0; i < max_arena; i++) {
+ void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
+ if (p != NULL) return p;
+ }
+ }
+ }
+ return NULL;
+}
+
+// try to reserve a fresh arena space
+static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id)
+{
+ if (_mi_preloading()) return false; // use OS only while pre loading
+ if (req_arena_id != _mi_arena_id_none()) return false;
+
+ const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count);
+ if (arena_count > (MI_MAX_ARENAS - 4)) return false;
+
+ // calc reserve
+ size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve);
+ if (arena_reserve == 0) return false;
+
+ if (!_mi_os_has_virtual_reserve()) {
+ arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example)
+ }
+ arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
+
+ if (arena_count >= 8 && arena_count <= 128) {
+ // scale up the arena sizes exponentially every 8 entries (128 entries get to 589TiB)
+ const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16);
+ size_t reserve = 0;
+ if (!mi_mul_overflow(multiplier, arena_reserve, &reserve)) {
+ arena_reserve = reserve;
+ }
+ }
+
+ // check arena bounds
+ const size_t min_reserve = mi_size_of_blocks(mi_arena_info_blocks() + 1);
+ const size_t max_reserve = MI_BITMAP_MAX_BITS * MI_ARENA_BLOCK_SIZE;
+ if (arena_reserve < min_reserve) {
+ arena_reserve = min_reserve;
+ }
+ else if (arena_reserve > max_reserve) {
+ arena_reserve = max_reserve;
+ }
+
+ if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size
+
+ // commit eagerly?
+ bool arena_commit = false;
+ if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); }
+ else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
+
+ return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0);
+}
+
+
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large,
+ mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
+{
+ mi_assert_internal(memid != NULL && tld != NULL);
+ mi_assert_internal(size > 0);
+ size_t tseq = _mi_thread_seq_id();
+ *memid = _mi_memid_none();
+
+ const int numa_node = _mi_os_numa_node(tld); // current numa node
+
+ // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
+ if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed?
+ if (size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && alignment <= MI_ARENA_BLOCK_ALIGN && align_offset == 0) {
+ void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
+ if (p != NULL) return p;
+
+ // otherwise, try to first eagerly reserve a new arena
+ if (req_arena_id == _mi_arena_id_none()) {
+ mi_arena_id_t arena_id = 0;
+ if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
+ // and try allocate in there
+ mi_assert_internal(req_arena_id == _mi_arena_id_none());
+ p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, tseq, memid, tld);
+ if (p != NULL) return p;
+ }
+ }
+ }
+ }
+
+ // if we cannot use OS allocation, return NULL
+ if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ // finally, fall back to the OS
+ if (align_offset > 0) {
+ return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats);
+ }
+ else {
+ return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats);
+ }
+}
+
+void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
+{
+ return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld);
+}
+
+
+/* -----------------------------------------------------------
+ Arena free
+----------------------------------------------------------- */
+static void mi_arena_schedule_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats);
+static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats);
+
+void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) {
+ mi_assert_internal(size > 0 && stats != NULL);
+ mi_assert_internal(committed_size <= size);
+ if (p==NULL) return;
+ if (size==0) return;
+ const bool all_committed = (committed_size == size);
+
+ // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
+ mi_track_mem_undefined(p, size);
+
+ if (mi_memkind_is_os(memid.memkind)) {
+ // was a direct OS allocation, pass through
+ if (!all_committed && committed_size > 0) {
+ // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size)
+ _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
+ }
+ _mi_os_free(p, size, memid, stats);
+ }
+ else if (memid.memkind == MI_MEM_ARENA) {
+ // allocated in an arena
+ size_t arena_idx;
+ size_t block_idx;
+ mi_arena_memid_indices(memid, &arena_idx, &block_idx);
+ mi_assert_internal(arena_idx < MI_MAX_ARENAS);
+ mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
+ mi_assert_internal(arena != NULL);
+ const size_t blocks = mi_block_count_of_size(size);
+
+ // checks
+ if (arena == NULL) {
+ _mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+ return;
+ }
+ mi_assert_internal(block_idx < arena->block_count);
+ mi_assert_internal(block_idx > mi_arena_info_blocks());
+ if (block_idx <= mi_arena_info_blocks() || block_idx > arena->block_count) {
+ _mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+ return;
+ }
+
+ // potentially decommit
+ if (arena->memid.is_pinned || arena->memid.initially_committed) {
+ mi_assert_internal(all_committed);
+ }
+ else {
+ if (!all_committed) {
+ // mark the entire range as no longer committed (so we recommit the full range when re-using)
+ mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_committed, blocks, block_idx, NULL);
+ mi_track_mem_noaccess(p, size);
+ if (committed_size > 0) {
+ // if partially committed, adjust the committed stats (is it will be recommitted when re-using)
+ // in the delayed purge, we now need to not count a decommit if the range is not marked as committed.
+ _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
+ }
+ // note: if not all committed, it may be that the purge will reset/decommit the entire range
+ // that contains already decommitted parts. Since purge consistently uses reset or decommit that
+ // works (as we should never reset decommitted parts).
+ }
+ // (delay) purge the entire range
+ mi_arena_schedule_purge(arena, block_idx, blocks, stats);
+ }
+
+ // and make it available to others again
+ bool all_inuse = mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_free, block_idx, blocks, NULL);
+ if (!all_inuse) {
+ _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size);
+ return;
+ };
+ }
+ else {
+ // arena was none, external, or static; nothing to do
+ mi_assert_internal(memid.memkind < MI_MEM_OS);
+ }
+
+ // purge expired decommits
+ mi_arenas_try_purge(false, false, stats);
+}
+
+// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
+// for dynamic libraries that are unloaded and need to release all their allocated memory.
+static void mi_arenas_unsafe_destroy(void) {
+ const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+ size_t new_max_arena = 0;
+ for (size_t i = 0; i < max_arena; i++) {
+ mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
+ if (arena != NULL) {
+ mi_lock_done(&arena->abandoned_visit_lock);
+ if (mi_memkind_is_os(arena->memid.memkind)) {
+ mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
+ _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid, &_mi_stats_main);
+ }
+ }
+ }
+
+ // try to lower the max arena.
+ size_t expected = max_arena;
+ mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena);
+}
+
+// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired
+void _mi_arenas_collect(bool force_purge, mi_stats_t* stats) {
+ mi_arenas_try_purge(force_purge, force_purge /* visit all? */, stats);
+}
+
+// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
+// for dynamic libraries that are unloaded and need to release all their allocated memory.
+void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
+ mi_arenas_unsafe_destroy();
+ _mi_arenas_collect(true /* force purge */, stats); // purge non-owned arenas
+}
+
+// Is a pointer inside any of our arenas?
+bool _mi_arena_contains(const void* p) {
+ const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+ for (size_t i = 0; i < max_arena; i++) {
+ mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+ if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_blocks(arena->block_count) > (const uint8_t*)p) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/* -----------------------------------------------------------
+ Add an arena.
+----------------------------------------------------------- */
+
+static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) {
+ mi_assert_internal(arena != NULL);
+ mi_assert_internal(arena->block_count > 0);
+ if (arena_id != NULL) { *arena_id = -1; }
+
+ size_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
+ if (i >= MI_MAX_ARENAS) {
+ mi_atomic_decrement_acq_rel(&mi_arena_count);
+ return false;
+ }
+ _mi_stat_counter_increase(&stats->arena_count,1);
+ arena->id = mi_arena_id_create(i);
+ mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
+ if (arena_id != NULL) { *arena_id = arena->id; }
+ return true;
+}
+
+static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
+{
+ mi_assert(!is_large || memid.initially_committed && memid.is_pinned);
+ mi_assert(_mi_is_aligned(start,MI_ARENA_BLOCK_SIZE));
+ mi_assert(start!=NULL);
+ if (start==NULL) return false;
+ if (!_mi_is_aligned(start,MI_ARENA_BLOCK_SIZE)) {
+ // todo: use alignment in memid to align to blocksize first?
+ _mi_warning_message("cannot use OS memory since it is not aligned to %zu KiB (address %p)", MI_ARENA_BLOCK_SIZE/MI_KiB, start);
+ return false;
+ }
+
+ if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); }
+
+ const size_t info_blocks = mi_arena_info_blocks();
+ const size_t bcount = size / MI_ARENA_BLOCK_SIZE; // divide down
+ if (bcount < info_blocks+1) {
+ _mi_warning_message("cannot use OS memory since it is not large enough (size %zu KiB, minimum required is %zu KiB)", size/MI_KiB, mi_size_of_blocks(info_blocks+1)/MI_KiB);
+ return false;
+ }
+ if (bcount > MI_BITMAP_MAX_BITS) {
+ // todo: allow larger areas (either by splitting it up in arena's or having larger arena's)
+ _mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_blocks(MI_BITMAP_MAX_BITS)/MI_MiB);
+ return false;
+ }
+ mi_arena_t* arena = (mi_arena_t*)start;
+
+ // commit & zero if needed
+ bool is_zero = memid.initially_zero;
+ if (!memid.initially_committed) {
+ _mi_os_commit(arena, mi_size_of_blocks(info_blocks), &is_zero, &_mi_stats_main);
+ }
+ if (!is_zero) {
+ _mi_memzero(arena, mi_size_of_blocks(info_blocks));
+ }
+
+ // init
+ arena->id = _mi_arena_id_none();
+ arena->memid = memid;
+ arena->exclusive = exclusive;
+ arena->block_count = bcount;
+ arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
+ arena->is_large = is_large;
+ arena->purge_expire = 0;
+ mi_lock_init(&arena->abandoned_visit_lock);
+
+ // init bitmaps
+ mi_bitmap_init(&arena->blocks_free,true);
+ mi_bitmap_init(&arena->blocks_committed,true);
+ mi_bitmap_init(&arena->blocks_dirty,true);
+ mi_bitmap_init(&arena->blocks_purge,true);
+ for( int i = 0; i < MI_ARENA_BIN_COUNT; i++) {
+ mi_bitmap_init(&arena->blocks_abandoned[i],true);
+ }
+
+ // reserve our meta info (and reserve blocks outside the memory area)
+ mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->blocks_free, info_blocks /* start */, arena->block_count - info_blocks);
+ if (memid.initially_committed) {
+ mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->blocks_committed, 0, arena->block_count);
+ }
+ else {
+ mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_committed, 0, info_blocks, NULL);
+ }
+ mi_bitmap_xsetN(MI_BIT_SET, &arena->blocks_dirty, 0, info_blocks, NULL);
+
+ return mi_arena_add(arena, arena_id, &_mi_stats_main);
+}
+
+
+bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+ mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
+ memid.initially_committed = is_committed;
+ memid.initially_zero = is_zero;
+ memid.is_pinned = is_large;
+ return mi_manage_os_memory_ex2(start, size, is_large, numa_node, exclusive, memid, arena_id);
+}
+
+// Reserve a range of regular OS memory
+int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+ if (arena_id != NULL) *arena_id = _mi_arena_id_none();
+ size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
+ mi_memid_t memid;
+ void* start = _mi_os_alloc_aligned(size, MI_ARENA_BLOCK_ALIGN, commit, allow_large, &memid, &_mi_stats_main);
+ if (start == NULL) return ENOMEM;
+ const bool is_large = memid.is_pinned; // todo: use separate is_large field?
+ if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) {
+ _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main);
+ _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024));
+ return ENOMEM;
+ }
+ _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : "");
+ return 0;
+}
+
+
+// Manage a range of regular OS memory
+bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
+ return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL);
+}
+
+// Reserve a range of regular OS memory
+int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept {
+ return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL);
+}
+
+
+/* -----------------------------------------------------------
+ Debugging
+----------------------------------------------------------- */
+static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf) {
+ size_t bit_set_count = 0;
+ for (int bit = 0; bit < MI_BFIELD_BITS; bit++) {
+ bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0);
+ if (is_set) bit_set_count++;
+ buf[bit] = (is_set ? 'x' : '.');
+ }
+ return bit_set_count;
+}
+
+static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_t* bitmap) {
+ _mi_verbose_message("%s%s:\n", prefix, header);
+ size_t bit_count = 0;
+ size_t bit_set_count = 0;
+ for (int i = 0; i < MI_BFIELD_BITS && bit_count < block_count; i++) {
+ char buf[MI_BITMAP_CHUNK_BITS + 1];
+ mi_bitmap_chunk_t* chunk = &bitmap->chunks[i];
+ for (int j = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) {
+ if (bit_count < block_count) {
+ bit_set_count += mi_debug_show_bfield(chunk->bfields[j], buf + j*MI_BFIELD_BITS);
+ }
+ else {
+ _mi_memset(buf + j*MI_BFIELD_BITS, ' ', MI_BFIELD_BITS);
+ }
+ bit_count += MI_BFIELD_BITS;
+ }
+ buf[MI_BITMAP_CHUNK_BITS] = 0;
+ _mi_verbose_message("%s %s\n", prefix, buf);
+ }
+ _mi_verbose_message("%s total ('x'): %zu\n", prefix, bit_set_count);
+ return bit_set_count;
+}
+
+void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept {
+ MI_UNUSED(show_abandoned);
+ size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count);
+ size_t free_total = 0;
+ size_t block_total = 0;
+ //size_t abandoned_total = 0;
+ size_t purge_total = 0;
+ for (size_t i = 0; i < max_arenas; i++) {
+ mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+ if (arena == NULL) break;
+ block_total += arena->block_count;
+ _mi_verbose_message("arena %zu: %zu blocks%s\n", i, arena->block_count, (arena->memid.is_pinned ? ", pinned" : ""));
+ if (show_inuse) {
+ free_total += mi_debug_show_bitmap(" ", "free blocks", arena->block_count, &arena->blocks_free);
+ }
+ mi_debug_show_bitmap(" ", "committed blocks", arena->block_count, &arena->blocks_committed);
+ // todo: abandoned blocks
+ if (show_purge) {
+ purge_total += mi_debug_show_bitmap(" ", "purgeable blocks", arena->block_count, &arena->blocks_purge);
+ }
+ }
+ if (show_inuse) _mi_verbose_message("total inuse blocks : %zu\n", block_total - free_total);
+ // if (show_abandoned) _mi_verbose_message("total abandoned blocks: %zu\n", abandoned_total);
+ if (show_purge) _mi_verbose_message("total purgeable blocks: %zu\n", purge_total);
+}
+
+
+/* -----------------------------------------------------------
+ Reserve a huge page arena.
+----------------------------------------------------------- */
+// reserve at a specific numa node
+int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+ if (arena_id != NULL) *arena_id = -1;
+ if (pages==0) return 0;
+ if (numa_node < -1) numa_node = -1;
+ if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
+ size_t hsize = 0;
+ size_t pages_reserved = 0;
+ mi_memid_t memid;
+ void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid);
+ if (p==NULL || pages_reserved==0) {
+ _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
+ return ENOMEM;
+ }
+ _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
+
+ if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) {
+ _mi_os_free(p, hsize, memid, &_mi_stats_main);
+ return ENOMEM;
+ }
+ return 0;
+}
+
+int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
+ return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL);
+}
+
+// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
+int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
+ if (pages == 0) return 0;
+
+ // pages per numa node
+ size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
+ if (numa_count <= 0) numa_count = 1;
+ const size_t pages_per = pages / numa_count;
+ const size_t pages_mod = pages % numa_count;
+ const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
+
+ // reserve evenly among numa nodes
+ for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
+ size_t node_pages = pages_per; // can be 0
+ if (numa_node < pages_mod) node_pages++;
+ int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
+ if (err) return err;
+ if (pages < node_pages) {
+ pages = 0;
+ }
+ else {
+ pages -= node_pages;
+ }
+ }
+
+ return 0;
+}
+
+int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+ MI_UNUSED(max_secs);
+ _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
+ if (pages_reserved != NULL) *pages_reserved = 0;
+ int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
+ if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
+ return err;
+}
+
+
+
+/* -----------------------------------------------------------
+ Abandoned pages
+----------------------------------------------------------- */
+
+void mi_arena_page_abandon(mi_page_t* page) {
+ mi_assert_internal(mi_page_is_abandoned(page));
+ if (mi_page_is_full(page)) {}
+}
+
+
+
+/* -----------------------------------------------------------
+ Arena purge
+----------------------------------------------------------- */
+
+static long mi_arena_purge_delay(void) {
+ // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay
+ return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult));
+}
+
+// reset or decommit in an arena and update the committed/decommit bitmaps
+// assumes we own the area (i.e. blocks_free is claimed by us)
+static void mi_arena_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats) {
+ mi_assert_internal(!arena->memid.is_pinned);
+ const size_t size = mi_size_of_blocks(blocks);
+ void* const p = mi_arena_block_start(arena, block_idx);
+ bool needs_recommit;
+ if (mi_bitmap_is_xsetN(MI_BIT_SET, &arena->blocks_committed, block_idx, blocks)) {
+ // all blocks are committed, we can purge freely
+ needs_recommit = _mi_os_purge(p, size, stats);
+ }
+ else {
+ // some blocks are not committed -- this can happen when a partially committed block is freed
+ // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
+ // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
+ // and also undo the decommit stats (as it was already adjusted)
+ mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
+ needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats);
+ if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); }
+ }
+
+ // clear the purged blocks
+ mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_purge, blocks, block_idx, NULL);
+
+ // update committed bitmap
+ if (needs_recommit) {
+ mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->blocks_committed, blocks, block_idx, NULL);
+ }
+}
+
+
+// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls.
+// Note: assumes we (still) own the area as we may purge immediately
+static void mi_arena_schedule_purge(mi_arena_t* arena, size_t block_idx, size_t blocks, mi_stats_t* stats) {
+ const long delay = mi_arena_purge_delay();
+ if (delay < 0) return; // is purging allowed at all?
+
+ if (_mi_preloading() || delay == 0) {
+ // decommit directly
+ mi_arena_purge(arena, block_idx, blocks, stats);
+ }
+ else {
+ // schedule decommit
+ _mi_error_message(EFAULT, "purging not yet implemented\n");
+ }
+}
+
+
+static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats) {
+ if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled
+
+ const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count);
+ if (max_arena == 0) return;
+
+ _mi_error_message(EFAULT, "purging not yet implemented\n");
+ MI_UNUSED(stats);
+ MI_UNUSED(visit_all);
+ MI_UNUSED(force);
+}
+
+
+#if 0
+
#define MI_IN_ARENA_C
#include "arena-abandon.c"
#undef MI_IN_ARENA_C
@@ -116,12 +900,12 @@ static size_t mi_block_count_of_size(size_t size) {
return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
}
-static size_t mi_arena_block_size(size_t bcount) {
+static size_t mi_size_of_blocks(size_t bcount) {
return (bcount * MI_ARENA_BLOCK_SIZE);
}
static size_t mi_arena_size(mi_arena_t* arena) {
- return mi_arena_block_size(arena->block_count);
+ return mi_size_of_blocks(arena->block_count);
}
static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) {
@@ -207,7 +991,7 @@ void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) {
}
void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) {
- return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex)));
+ return (arena->start + mi_size_of_blocks(mi_bitmap_index_bit(bindex)));
}
@@ -216,7 +1000,7 @@ void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) {
----------------------------------------------------------- */
// claim the `blocks_inuse` bits
-static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
+static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, size_t block_idx, mi_stats_t* stats)
{
size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter
if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx, stats)) {
@@ -268,7 +1052,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
if (any_uncommitted) {
bool commit_zero = false;
- if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) {
+ if (!_mi_os_commit(p, mi_size_of_blocks(needed_bcount), &commit_zero, tld->stats)) {
memid->initially_committed = false;
}
else {
@@ -293,7 +1077,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no
const size_t bcount = mi_block_count_of_size(size);
const size_t arena_index = mi_arena_id_index(arena_id);
mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
- mi_assert_internal(size <= mi_arena_block_size(bcount));
+ mi_assert_internal(size <= mi_size_of_blocks(bcount));
// Check arena suitability
mi_arena_t* arena = mi_arena_from_index(arena_index);
@@ -439,7 +1223,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
if (arena_index >= MI_MAX_ARENAS) return NULL;
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
if (arena == NULL) return NULL;
- if (size != NULL) { *size = mi_arena_block_size(arena->block_count); }
+ if (size != NULL) { *size = mi_size_of_blocks(arena->block_count); }
return arena->start;
}
@@ -459,7 +1243,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
mi_assert_internal(arena->blocks_committed != NULL);
mi_assert_internal(arena->blocks_purge != NULL);
mi_assert_internal(!arena->memid.is_pinned);
- const size_t size = mi_arena_block_size(blocks);
+ const size_t size = mi_size_of_blocks(blocks);
void* const p = mi_arena_block_start(arena, bitmap_idx);
bool needs_recommit;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
@@ -511,25 +1295,25 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
// purge a range of blocks
// return true if the full range was purged.
// assumes we own the area (i.e. blocks_in_use is claimed by us)
-static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) {
- const size_t endidx = startidx + bitlen;
- size_t bitidx = startidx;
+static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startseqx, size_t bitlen, size_t purge, mi_stats_t* stats) {
+ const size_t endidx = startseqx + bitlen;
+ size_t bitseqx = startseqx;
bool all_purged = false;
- while (bitidx < endidx) {
+ while (bitseqx < endidx) {
// count consecutive ones in the purge mask
size_t count = 0;
- while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) {
+ while (bitseqx + count < endidx && (purge & ((size_t)1 << (bitseqx + count))) != 0) {
count++;
}
if (count > 0) {
// found range to be purged
- const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx);
+ const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitseqx);
mi_arena_purge(arena, range_idx, count, stats);
if (count == bitlen) {
all_purged = true;
}
}
- bitidx += (count+1); // +1 to skip the zero bit (or end)
+ bitseqx += (count+1); // +1 to skip the zero bit (or end)
}
return all_purged;
}
@@ -551,16 +1335,16 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi
for (size_t i = 0; i < arena->field_count; i++) {
size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]);
if (purge != 0) {
- size_t bitidx = 0;
- while (bitidx < MI_BITMAP_FIELD_BITS) {
+ size_t bitseqx = 0;
+ while (bitseqx < MI_BITMAP_FIELD_BITS) {
// find consecutive range of ones in the purge mask
size_t bitlen = 0;
- while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) {
+ while (bitseqx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitseqx + bitlen))) != 0) {
bitlen++;
}
// temporarily claim the purge range as "in-use" to be thread-safe with allocation
// try to claim the longest range of corresponding in_use bits
- const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx);
+ const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitseqx);
while( bitlen > 0 ) {
if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) {
break;
@@ -571,15 +1355,15 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi
if (bitlen > 0) {
// read purge again now that we have the in_use bits
purge = mi_atomic_load_acquire(&arena->blocks_purge[i]);
- if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) {
+ if (!mi_arena_purge_range(arena, i, bitseqx, bitlen, purge, stats)) {
full_purge = false;
}
any_purged = true;
// release the claimed `in_use` bits again
_mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index);
}
- bitidx += (bitlen+1); // +1 to skip the zero (or end)
- } // while bitidx
+ bitseqx += (bitlen+1); // +1 to skip the zero (or end)
+ } // while bitseqx
} // purge != 0
}
// if not fully purged, make sure to purge again in the future
@@ -742,7 +1526,7 @@ bool _mi_arena_contains(const void* p) {
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
- if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
+ if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_size_of_blocks(arena->block_count) > (const uint8_t*)p) {
return true;
}
}
@@ -818,8 +1602,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
mi_assert_internal(post >= 0);
if (post > 0) {
// don't use leftover bits at the end
- mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
- _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
+ mi_bitmap_index_t postseqx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
+ _mi_bitmap_claim(arena->blocks_inuse, fields, post, postseqx, NULL);
}
return mi_arena_add(arena, arena_id, &_mi_stats_main);
@@ -986,3 +1770,4 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
}
+#endif
\ No newline at end of file
diff --git a/src/bitmap-old.c b/src/bitmap-old.c
new file mode 100644
index 00000000..3e6311dc
--- /dev/null
+++ b/src/bitmap-old.c
@@ -0,0 +1,419 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+Concurrent bitmap that can set/reset sequences of bits atomically,
+represented as an array of fields where each field is a machine word (`size_t`)
+
+There are two api's; the standard one cannot have sequences that cross
+between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
+
+The `_across` postfixed functions do allow sequences that can cross over
+between the fields. (This is used in arena allocation)
+---------------------------------------------------------------------------- */
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/bits.h"
+#include "bitmap.h"
+
+/* -----------------------------------------------------------
+ Bitmap definition
+----------------------------------------------------------- */
+
+// The bit mask for a given number of blocks at a specified bit index.
+static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
+ mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
+ mi_assert_internal(count > 0);
+ if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
+ if (count == 0) return 0;
+ return ((((size_t)1 << count) - 1) << bitidx);
+}
+
+
+
+/* -----------------------------------------------------------
+ Claim a bit sequence atomically
+----------------------------------------------------------- */
+
+// Try to atomically claim a sequence of `count` bits in a single
+// field at `idx` in `bitmap`. Returns `true` on success.
+bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
+{
+ mi_assert_internal(bitmap_idx != NULL);
+ mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
+ mi_bitmap_field_t* field = &bitmap[idx];
+ size_t map = mi_atomic_load_relaxed(field);
+ if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
+
+ // search for 0-bit sequence of length count
+ const size_t mask = mi_bitmap_mask_(count, 0);
+ const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
+
+#if MI_HAS_FAST_BITSCAN
+ size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
+#else
+ size_t bitidx = 0; // otherwise start at 0
+#endif
+ size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
+
+ // scan linearly for a free range of zero bits
+ while (bitidx <= bitidx_max) {
+ const size_t mapm = (map & m);
+ if (mapm == 0) { // are the mask bits free at bitidx?
+ mi_assert_internal((m >> bitidx) == mask); // no overflow?
+ const size_t newmap = (map | m);
+ mi_assert_internal((newmap^map) >> bitidx == mask);
+ if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
+ // no success, another thread claimed concurrently.. keep going (with updated `map`)
+ continue;
+ }
+ else {
+ // success, we claimed the bits!
+ *bitmap_idx = mi_bitmap_index_create(idx, bitidx);
+ return true;
+ }
+ }
+ else {
+ // on to the next bit range
+#if MI_HAS_FAST_BITSCAN
+ mi_assert_internal(mapm != 0);
+ const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
+ mi_assert_internal(shift > 0 && shift <= count);
+#else
+ const size_t shift = 1;
+#endif
+ bitidx += shift;
+ m <<= shift;
+ }
+ }
+ // no bits found
+ return false;
+}
+
+
+// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
+// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
+bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
+ size_t idx = start_field_idx;
+ for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
+ if (idx >= bitmap_fields) { idx = 0; } // wrap
+ if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+// Set `count` bits at `bitmap_idx` to 0 atomically
+// Returns `true` if all `count` bits were 1 previously.
+bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+ const size_t idx = mi_bitmap_index_field(bitmap_idx);
+ const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+ const size_t mask = mi_bitmap_mask_(count, bitidx);
+ mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
+ // mi_assert_internal((bitmap[idx] & mask) == mask);
+ const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
+ return ((prev & mask) == mask);
+}
+
+
+// Set `count` bits at `bitmap_idx` to 1 atomically
+// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
+bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
+ const size_t idx = mi_bitmap_index_field(bitmap_idx);
+ const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+ const size_t mask = mi_bitmap_mask_(count, bitidx);
+ mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
+ //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
+ size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
+ if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
+ return ((prev & mask) == 0);
+}
+
+// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
+static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
+ const size_t idx = mi_bitmap_index_field(bitmap_idx);
+ const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+ const size_t mask = mi_bitmap_mask_(count, bitidx);
+ mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
+ const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
+ if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
+ return ((field & mask) == mask);
+}
+
+// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
+// Returns `true` if successful when all previous `count` bits were 0.
+bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+ const size_t idx = mi_bitmap_index_field(bitmap_idx);
+ const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+ const size_t mask = mi_bitmap_mask_(count, bitidx);
+ mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
+ size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
+ do {
+ if ((expected & mask) != 0) return false;
+ }
+ while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
+ mi_assert_internal((expected & mask) == 0);
+ return true;
+}
+
+
+bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+ return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
+}
+
+bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+ bool any_ones;
+ mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
+ return any_ones;
+}
+
+
+//--------------------------------------------------------------------------
+// the `_across` functions work on bitmaps where sequences can cross over
+// between the fields. This is used in arena allocation
+//--------------------------------------------------------------------------
+
+// Try to atomically claim a sequence of `count` bits starting from the field
+// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
+// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
+static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
+{
+ mi_assert_internal(bitmap_idx != NULL);
+
+ // check initial trailing zeros
+ mi_bitmap_field_t* field = &bitmap[idx];
+ size_t map = mi_atomic_load_relaxed(field);
+ const size_t initial = mi_clz(map); // count of initial zeros starting at idx
+ mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
+ if (initial == 0) return false;
+ if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
+ if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
+
+ // scan ahead
+ size_t found = initial;
+ size_t mask = 0; // mask bits for the final field
+ while(found < count) {
+ field++;
+ map = mi_atomic_load_relaxed(field);
+ const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
+ mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
+ mask = mi_bitmap_mask_(mask_bits, 0);
+ if ((map & mask) != 0) return false; // some part is already claimed
+ found += mask_bits;
+ }
+ mi_assert_internal(field < &bitmap[bitmap_fields]);
+
+ // we found a range of contiguous zeros up to the final field; mask contains mask in the final field
+ // now try to claim the range atomically
+ mi_bitmap_field_t* const final_field = field;
+ const size_t final_mask = mask;
+ mi_bitmap_field_t* const initial_field = &bitmap[idx];
+ const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
+ const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
+
+ // initial field
+ size_t newmap;
+ field = initial_field;
+ map = mi_atomic_load_relaxed(field);
+ do {
+ newmap = (map | initial_mask);
+ if ((map & initial_mask) != 0) { goto rollback; };
+ } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
+
+ // intermediate fields
+ while (++field < final_field) {
+ newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
+ map = 0;
+ if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
+ }
+
+ // final field
+ mi_assert_internal(field == final_field);
+ map = mi_atomic_load_relaxed(field);
+ do {
+ newmap = (map | final_mask);
+ if ((map & final_mask) != 0) { goto rollback; }
+ } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
+
+ // claimed!
+ mi_stat_counter_increase(stats->arena_crossover_count,1);
+ *bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
+ return true;
+
+rollback:
+ // roll back intermediate fields
+ // (we just failed to claim `field` so decrement first)
+ while (--field > initial_field) {
+ newmap = 0;
+ map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
+ mi_assert_internal(mi_atomic_load_relaxed(field) == map);
+ mi_atomic_store_release(field, newmap);
+ }
+ if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
+ map = mi_atomic_load_relaxed(field);
+ do {
+ mi_assert_internal((map & initial_mask) == initial_mask);
+ newmap = (map & ~initial_mask);
+ } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
+ }
+ mi_stat_counter_increase(stats->arena_rollback_count,1);
+ // retry? (we make a recursive call instead of goto to be able to use const declarations)
+ if (retries <= 2) {
+ return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
+ }
+ else {
+ return false;
+ }
+}
+
+
+// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
+// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
+bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
+ mi_assert_internal(count > 0);
+ if (count <= 2) {
+ // we don't bother with crossover fields for small counts
+ return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
+ }
+
+ // visit the fields
+ size_t idx = start_field_idx;
+ for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
+ if (idx >= bitmap_fields) { idx = 0; } // wrap
+ // first try to claim inside a field
+ /*
+ if (count <= MI_BITMAP_FIELD_BITS) {
+ if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
+ return true;
+ }
+ }
+ */
+ // if that fails, then try to claim across fields
+ if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Helper for masks across fields; returns the mid count, post_mask may be 0
+static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
+ MI_UNUSED(bitmap_fields);
+ const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
+ if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
+ *pre_mask = mi_bitmap_mask_(count, bitidx);
+ *mid_mask = 0;
+ *post_mask = 0;
+ mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
+ return 0;
+ }
+ else {
+ const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
+ mi_assert_internal(pre_bits < count);
+ *pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
+ count -= pre_bits;
+ const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
+ *mid_mask = MI_BITMAP_FIELD_FULL;
+ count %= MI_BITMAP_FIELD_BITS;
+ *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
+ mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
+ return mid_count;
+ }
+}
+
+// Set `count` bits at `bitmap_idx` to 0 atomically
+// Returns `true` if all `count` bits were 1 previously.
+bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+ size_t idx = mi_bitmap_index_field(bitmap_idx);
+ size_t pre_mask;
+ size_t mid_mask;
+ size_t post_mask;
+ size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
+ bool all_one = true;
+ mi_bitmap_field_t* field = &bitmap[idx];
+ size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
+ if ((prev & pre_mask) != pre_mask) all_one = false;
+ while(mid_count-- > 0) {
+ prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
+ if ((prev & mid_mask) != mid_mask) all_one = false;
+ }
+ if (post_mask!=0) {
+ prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
+ if ((prev & post_mask) != post_mask) all_one = false;
+ }
+ return all_one;
+}
+
+// Set `count` bits at `bitmap_idx` to 1 atomically
+// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
+bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
+ size_t idx = mi_bitmap_index_field(bitmap_idx);
+ size_t pre_mask;
+ size_t mid_mask;
+ size_t post_mask;
+ size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
+ bool all_zero = true;
+ bool any_zero = false;
+ _Atomic(size_t)*field = &bitmap[idx];
+ size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
+ if ((prev & pre_mask) != 0) all_zero = false;
+ if ((prev & pre_mask) != pre_mask) any_zero = true;
+ while (mid_count-- > 0) {
+ prev = mi_atomic_or_acq_rel(field++, mid_mask);
+ if ((prev & mid_mask) != 0) all_zero = false;
+ if ((prev & mid_mask) != mid_mask) any_zero = true;
+ }
+ if (post_mask!=0) {
+ prev = mi_atomic_or_acq_rel(field, post_mask);
+ if ((prev & post_mask) != 0) all_zero = false;
+ if ((prev & post_mask) != post_mask) any_zero = true;
+ }
+ if (pany_zero != NULL) { *pany_zero = any_zero; }
+ return all_zero;
+}
+
+
+// Returns `true` if all `count` bits were 1.
+// `any_ones` is `true` if there was at least one bit set to one.
+static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
+ size_t idx = mi_bitmap_index_field(bitmap_idx);
+ size_t pre_mask;
+ size_t mid_mask;
+ size_t post_mask;
+ size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
+ bool all_ones = true;
+ bool any_ones = false;
+ mi_bitmap_field_t* field = &bitmap[idx];
+ size_t prev = mi_atomic_load_relaxed(field++);
+ if ((prev & pre_mask) != pre_mask) all_ones = false;
+ if ((prev & pre_mask) != 0) any_ones = true;
+ while (mid_count-- > 0) {
+ prev = mi_atomic_load_relaxed(field++);
+ if ((prev & mid_mask) != mid_mask) all_ones = false;
+ if ((prev & mid_mask) != 0) any_ones = true;
+ }
+ if (post_mask!=0) {
+ prev = mi_atomic_load_relaxed(field);
+ if ((prev & post_mask) != post_mask) all_ones = false;
+ if ((prev & post_mask) != 0) any_ones = true;
+ }
+ if (pany_ones != NULL) { *pany_ones = any_ones; }
+ return all_ones;
+}
+
+bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+ return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
+}
+
+bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+ bool any_ones;
+ mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
+ return any_ones;
+}
diff --git a/src/bitmap-old.h b/src/bitmap-old.h
new file mode 100644
index 00000000..f8898935
--- /dev/null
+++ b/src/bitmap-old.h
@@ -0,0 +1,110 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+Concurrent bitmap that can set/reset sequences of bits atomically,
+represented as an array of fields where each field is a machine word (`size_t`)
+
+There are two api's; the standard one cannot have sequences that cross
+between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
+(this is used in region allocation)
+
+The `_across` postfixed functions do allow sequences that can cross over
+between the fields. (This is used in arena allocation)
+---------------------------------------------------------------------------- */
+#pragma once
+#ifndef MI_BITMAP_H
+#define MI_BITMAP_H
+
+/* -----------------------------------------------------------
+ Bitmap definition
+----------------------------------------------------------- */
+
+#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
+#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
+
+// An atomic bitmap of `size_t` fields
+typedef _Atomic(size_t) mi_bitmap_field_t;
+typedef mi_bitmap_field_t* mi_bitmap_t;
+
+// A bitmap index is the index of the bit in a bitmap.
+typedef size_t mi_bitmap_index_t;
+
+// Create a bit index.
+static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
+ mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
+ return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
+}
+static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
+ mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
+ return mi_bitmap_index_create_ex(idx,bitidx);
+}
+
+// Get the field index from a bit index.
+static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
+ return (bitmap_idx / MI_BITMAP_FIELD_BITS);
+}
+
+// Get the bit index in a bitmap field
+static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
+ return (bitmap_idx % MI_BITMAP_FIELD_BITS);
+}
+
+// Get the full bit index
+static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
+ return bitmap_idx;
+}
+
+/* -----------------------------------------------------------
+ Claim a bit sequence atomically
+----------------------------------------------------------- */
+
+// Try to atomically claim a sequence of `count` bits in a single
+// field at `idx` in `bitmap`. Returns `true` on success.
+bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
+
+// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
+// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
+bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
+
+// Set `count` bits at `bitmap_idx` to 0 atomically
+// Returns `true` if all `count` bits were 1 previously.
+bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+
+// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
+// Returns `true` if successful when all previous `count` bits were 0.
+bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+
+// Set `count` bits at `bitmap_idx` to 1 atomically
+// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
+bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
+
+bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+
+
+//--------------------------------------------------------------------------
+// the `_across` functions work on bitmaps where sequences can cross over
+// between the fields. This is used in arena allocation
+//--------------------------------------------------------------------------
+
+// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
+// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
+bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
+
+// Set `count` bits at `bitmap_idx` to 0 atomically
+// Returns `true` if all `count` bits were 1 previously.
+bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+
+// Set `count` bits at `bitmap_idx` to 1 atomically
+// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
+bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
+
+bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+
+#endif
diff --git a/src/bitmap.c b/src/bitmap.c
index 3e6311dc..463d74c7 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -1,19 +1,12 @@
/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
+Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
-Concurrent bitmap that can set/reset sequences of bits atomically,
-represented as an array of fields where each field is a machine word (`size_t`)
-
-There are two api's; the standard one cannot have sequences that cross
-between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
-
-The `_across` postfixed functions do allow sequences that can cross over
-between the fields. (This is used in arena allocation)
+Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#include "mimalloc.h"
@@ -21,399 +14,586 @@ between the fields. (This is used in arena allocation)
#include "mimalloc/bits.h"
#include "bitmap.h"
-/* -----------------------------------------------------------
- Bitmap definition
------------------------------------------------------------ */
+/* --------------------------------------------------------------------------------
+ bfields
+-------------------------------------------------------------------------------- */
-// The bit mask for a given number of blocks at a specified bit index.
-static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
- mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
- mi_assert_internal(count > 0);
- if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
- if (count == 0) return 0;
- return ((((size_t)1 << count) - 1) << bitidx);
+static inline size_t mi_bfield_ctz(mi_bfield_t x) {
+ return mi_ctz(x);
+}
+
+static inline size_t mi_bfield_clz(mi_bfield_t x) {
+ return mi_clz(x);
+}
+
+// find the least significant bit that is set (i.e. count trailing zero's)
+// return false if `x==0` (with `*idx` undefined) and true otherwise,
+// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
+static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
+ return mi_bsf(x,idx);
+}
+
+static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
+ return mi_rotr(x,r);
+}
+
+// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
+static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
+ mi_assert_internal(idx < MI_BFIELD_BITS);
+ const mi_bfield_t mask = ((mi_bfield_t)1)<> bitidx) == mask); // no overflow?
- const size_t newmap = (map | m);
- mi_assert_internal((newmap^map) >> bitidx == mask);
- if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
- // no success, another thread claimed concurrently.. keep going (with updated `map`)
- continue;
+static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
+ mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
+ const size_t i = cidx / MI_BFIELD_BITS;
+ const size_t idx = cidx % MI_BFIELD_BITS;
+ return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
+}
+
+static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
+ mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
+ const size_t i = byte_idx / MI_BFIELD_SIZE;
+ const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
+ return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
+}
+
+// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
+static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
+ mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
+ mi_assert_internal(n>0);
+ bool all_transition = true;
+ bool all_already_xset = true;
+ size_t idx = cidx % MI_BFIELD_BITS;
+ size_t field = cidx / MI_BFIELD_BITS;
+ while (n > 0) {
+ size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
+ if (m > n) { m = n; }
+ mi_assert_internal(idx + m <= MI_BFIELD_BITS);
+ mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
+ const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask, &already_xset);
+ all_already_xset = all_already_xset && already_xset;
+ // next field
+ field++;
+ idx = 0;
+ n -= m;
+ }
+ *palready_xset = all_already_xset;
+ return all_transition;
+}
+
+// Check if a sequence of `n` bits within a chunk are all set/cleared.
+static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
+ mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
+ mi_assert_internal(n>0);
+ bool all_xset = true;
+ size_t idx = cidx % MI_BFIELD_BITS;
+ size_t field = cidx / MI_BFIELD_BITS;
+ while (n > 0) {
+ size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
+ if (m > n) { m = n; }
+ mi_assert_internal(idx + m <= MI_BFIELD_BITS);
+ mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
+ const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask);
+ // next field
+ field++;
+ idx = 0;
+ n -= m;
+ }
+ return all_xset;
+}
+
+// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
+// and false otherwise leaving all bit fields as is.
+static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
+ mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
+ mi_assert_internal(n>0);
+ if (n==0) return true;
+ size_t start_idx = cidx % MI_BFIELD_BITS;
+ size_t start_field = cidx / MI_BFIELD_BITS;
+ size_t end_field = MI_BITMAP_CHUNK_FIELDS;
+ size_t mask_mid = 0;
+ size_t mask_end = 0;
+
+ // first field
+ size_t field = start_field;
+ size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
+ if (m > n) { m = n; }
+ mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
+ mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
+ const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask_start)) return false;
+
+ // done?
+ n -= m;
+ if (n==0) return true;
+
+ // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
+
+ // mid fields
+ while (n >= MI_BFIELD_BITS) {
+ field++;
+ mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
+ mask_mid = ~MI_ZU(0);
+ if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
+ n -= MI_BFIELD_BITS;
+ }
+
+ // last field
+ if (n > 0) {
+ mi_assert_internal(n < MI_BFIELD_BITS);
+ field++;
+ mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
+ end_field = field;
+ mask_end = (MI_ZU(1)<bfields[field], mask_end)) goto restore;
+ }
+
+ return true;
+
+restore:
+ // field is on the field that failed to set atomically; we need to restore all previous fields
+ mi_assert_internal(field > start_field);
+ while( field > start_field) {
+ field--;
+ const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
+ bool already_xset;
+ mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
+ }
+ return false;
+}
+
+
+// find least 1-bit in a chunk and try unset it atomically
+// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
+// todo: try neon version
+static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
+ #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
+ while(true) {
+ const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
+ if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
+ const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
+ const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
+ mi_assert_internal(mask != 0);
+ const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
+ mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
+ size_t cidx;
+ if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
+ if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
+ *pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
+ mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
+ return true;
+ }
+ }
+ // try again
+ }
+ #else
+ size_t idx;
+ for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
+ size_t idx;
+ if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
+ if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
+ *pidx = (i*MI_BFIELD_BITS + idx);
+ mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
+ return true;
+ }
+ }
+ }
+ return false;
+ #endif
+}
+
+
+// find least byte in a chunk with all bits set, and try unset it atomically
+// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
+// todo: try neon version
+static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
+ #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
+ while(true) {
+ const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
+ const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
+ const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
+ if (mask == 0) return false;
+ const size_t i = _tzcnt_u32(mask);
+ mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
+ const size_t chunk_idx = i / MI_BFIELD_SIZE;
+ const size_t byte_idx = i % MI_BFIELD_SIZE;
+ if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
+ *pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
+ mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
+ return true;
+ }
+ // try again
+ }
+ #else
+ size_t idx;
+ for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
+ const mi_bfield_t x = chunk->bfields[i];
+ // has_set8 has low bit in each byte set if the byte in x == 0xFF
+ const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
+ (x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
+ >> 7; // shift high bit to low bit
+ size_t idx;
+ if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
+ mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
+ mi_assert_internal((idx%8)==0);
+ const size_t byte_idx = idx/8;
+ if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
+ *pidx = (i*MI_BFIELD_BITS) + idx;
+ mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
+ return true;
+ }
+ // else continue
+ }
+ }
+ return false;
+ #endif
+}
+
+
+// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
+// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
+// todo: try avx2 and neon version
+// todo: allow spanning across bfield boundaries?
+static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
+ if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
+ const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
+ for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
+ mi_bfield_t b = chunk->bfields[i];
+ size_t bshift = 0;
+ size_t idx;
+ while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
+ b >>= idx;
+ bshift += idx;
+ if (bshift + n >= MI_BFIELD_BITS) break;
+
+ if ((b&mask) == mask) { // found a match
+ mi_assert_internal( ((mask << bshift) >> bshift) == mask );
+ if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<bfields[i] >> bshift);
+ }
}
else {
- // success, we claimed the bits!
- *bitmap_idx = mi_bitmap_index_create(idx, bitidx);
- return true;
+ // advance
+ const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
+ mi_assert_internal(ones>0);
+ bshift += ones;
+ b >>= ones;
}
}
+ }
+ return false;
+}
+
+
+// are all bits in a bitmap chunk set?
+static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
+ #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
+ const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
+ return _mm256_test_all_ones(vec);
+ #else
+ // written like this for vectorization
+ mi_bfield_t x = chunk->bfields[0];
+ for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
+ x = x & chunk->bfields[i];
+ }
+ return (~x == 0);
+ #endif
+}
+
+// are all bits in a bitmap chunk clear?
+static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
+ #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
+ const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
+ return _mm256_testz_si256( vec, vec );
+ #else
+ // written like this for vectorization
+ mi_bfield_t x = chunk->bfields[0];
+ for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
+ x = x | chunk->bfields[i];
+ }
+ return (x == 0);
+ #endif
+}
+
+/* --------------------------------------------------------------------------------
+ bitmap
+-------------------------------------------------------------------------------- */
+// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
+void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
+ if (!already_zero) {
+ _mi_memzero_aligned(bitmap, sizeof(*bitmap));
+ }
+}
+
+// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
+void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
+ mi_assert_internal(n>0);
+ mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
+
+ // first chunk
+ size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
+ const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
+ size_t m = MI_BITMAP_CHUNK_BITS - cidx;
+ if (m > n) { m = n; }
+ bool already_xset;
+ mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
+
+ // n can be large so use memset for efficiency for all in-between chunks
+ chunk_idx++;
+ n -= m;
+ const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
+ if (mid_chunks > 0) {
+ _mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
+ chunk_idx += mid_chunks;
+ n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
+ }
+
+ // last chunk
+ if (n > 0) {
+ mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
+ mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
+ mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
+ }
+}
+
+
+// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
+// and false otherwise leaving the bitmask as is.
+bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
+ mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
+ const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
+ const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
+ return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
+}
+
+// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
+// and false otherwise leaving the bitmask as is.
+bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
+ mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
+ mi_assert_internal(idx%8 == 0);
+ const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
+ const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
+ return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
+}
+
+// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
+// and false otherwise leaving the bitmask as is.
+// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
+bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
+ mi_assert_internal(n>0);
+ mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
+ if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
+ if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
+
+ mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
+ const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
+ const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
+ mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
+ if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
+ return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
+}
+
+// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
+// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
+bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
+ mi_assert_internal(n>0);
+ mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
+ bool local_already_xset;
+ if (already_xset==NULL) { already_xset = &local_already_xset; }
+ // if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
+ // if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
+
+ mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
+ const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
+ const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
+ mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
+ if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
+ return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
+}
+
+// Is a sequence of n bits already all set/cleared?
+bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
+ mi_assert_internal(n>0);
+ mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
+ mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
+ const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
+ const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
+ mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
+ if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
+ return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
+}
+
+
+#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
+ { size_t _set_idx; \
+ size_t _start = start % MI_BFIELD_BITS; \
+ mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
+ while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
+ decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
+
+#define mi_bitmap_forall_set_chunks_end() \
+ _start += _set_idx+1; /* so chunk_idx stays valid */ \
+ _any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
+ _any_set >>= 1; \
+ } \
+ }
+
+// Find a set bit in a bitmap and atomically unset it. Returns true on success,
+// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
+// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
+// (to reduce thread contention).
+bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
+ mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
+ {
+ size_t cidx;
+ if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
+ *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
+ mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
+ return true;
+ }
else {
- // on to the next bit range
-#if MI_HAS_FAST_BITSCAN
- mi_assert_internal(mapm != 0);
- const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
- mi_assert_internal(shift > 0 && shift <= count);
-#else
- const size_t shift = 1;
-#endif
- bitidx += shift;
- m <<= shift;
- }
- }
- // no bits found
- return false;
-}
-
-
-// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
-bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
- size_t idx = start_field_idx;
- for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
- if (idx >= bitmap_fields) { idx = 0; } // wrap
- if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
- return true;
- }
- }
- return false;
-}
-
-
-// Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously.
-bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
- const size_t idx = mi_bitmap_index_field(bitmap_idx);
- const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
- const size_t mask = mi_bitmap_mask_(count, bitidx);
- mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
- // mi_assert_internal((bitmap[idx] & mask) == mask);
- const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
- return ((prev & mask) == mask);
-}
-
-
-// Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
- const size_t idx = mi_bitmap_index_field(bitmap_idx);
- const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
- const size_t mask = mi_bitmap_mask_(count, bitidx);
- mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
- //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
- size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
- if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
- return ((prev & mask) == 0);
-}
-
-// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
-static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
- const size_t idx = mi_bitmap_index_field(bitmap_idx);
- const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
- const size_t mask = mi_bitmap_mask_(count, bitidx);
- mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
- const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
- if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
- return ((field & mask) == mask);
-}
-
-// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
-// Returns `true` if successful when all previous `count` bits were 0.
-bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
- const size_t idx = mi_bitmap_index_field(bitmap_idx);
- const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
- const size_t mask = mi_bitmap_mask_(count, bitidx);
- mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
- size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
- do {
- if ((expected & mask) != 0) return false;
- }
- while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
- mi_assert_internal((expected & mask) == 0);
- return true;
-}
-
-
-bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
- return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
-}
-
-bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
- bool any_ones;
- mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
- return any_ones;
-}
-
-
-//--------------------------------------------------------------------------
-// the `_across` functions work on bitmaps where sequences can cross over
-// between the fields. This is used in arena allocation
-//--------------------------------------------------------------------------
-
-// Try to atomically claim a sequence of `count` bits starting from the field
-// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
-// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
-static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
-{
- mi_assert_internal(bitmap_idx != NULL);
-
- // check initial trailing zeros
- mi_bitmap_field_t* field = &bitmap[idx];
- size_t map = mi_atomic_load_relaxed(field);
- const size_t initial = mi_clz(map); // count of initial zeros starting at idx
- mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
- if (initial == 0) return false;
- if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
- if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
-
- // scan ahead
- size_t found = initial;
- size_t mask = 0; // mask bits for the final field
- while(found < count) {
- field++;
- map = mi_atomic_load_relaxed(field);
- const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
- mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
- mask = mi_bitmap_mask_(mask_bits, 0);
- if ((map & mask) != 0) return false; // some part is already claimed
- found += mask_bits;
- }
- mi_assert_internal(field < &bitmap[bitmap_fields]);
-
- // we found a range of contiguous zeros up to the final field; mask contains mask in the final field
- // now try to claim the range atomically
- mi_bitmap_field_t* const final_field = field;
- const size_t final_mask = mask;
- mi_bitmap_field_t* const initial_field = &bitmap[idx];
- const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
- const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
-
- // initial field
- size_t newmap;
- field = initial_field;
- map = mi_atomic_load_relaxed(field);
- do {
- newmap = (map | initial_mask);
- if ((map & initial_mask) != 0) { goto rollback; };
- } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
-
- // intermediate fields
- while (++field < final_field) {
- newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
- map = 0;
- if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
- }
-
- // final field
- mi_assert_internal(field == final_field);
- map = mi_atomic_load_relaxed(field);
- do {
- newmap = (map | final_mask);
- if ((map & final_mask) != 0) { goto rollback; }
- } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
-
- // claimed!
- mi_stat_counter_increase(stats->arena_crossover_count,1);
- *bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
- return true;
-
-rollback:
- // roll back intermediate fields
- // (we just failed to claim `field` so decrement first)
- while (--field > initial_field) {
- newmap = 0;
- map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
- mi_assert_internal(mi_atomic_load_relaxed(field) == map);
- mi_atomic_store_release(field, newmap);
- }
- if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
- map = mi_atomic_load_relaxed(field);
- do {
- mi_assert_internal((map & initial_mask) == initial_mask);
- newmap = (map & ~initial_mask);
- } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
- }
- mi_stat_counter_increase(stats->arena_rollback_count,1);
- // retry? (we make a recursive call instead of goto to be able to use const declarations)
- if (retries <= 2) {
- return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
- }
- else {
- return false;
- }
-}
-
-
-// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
-// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
- mi_assert_internal(count > 0);
- if (count <= 2) {
- // we don't bother with crossover fields for small counts
- return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
- }
-
- // visit the fields
- size_t idx = start_field_idx;
- for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
- if (idx >= bitmap_fields) { idx = 0; } // wrap
- // first try to claim inside a field
- /*
- if (count <= MI_BITMAP_FIELD_BITS) {
- if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
- return true;
+ // we may find that all are unset only on a second iteration but that is ok as
+ // _any_set is a conservative approximation.
+ if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
+ mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
- */
- // if that fails, then try to claim across fields
- if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
- return true;
- }
}
+ mi_bitmap_forall_set_chunks_end();
return false;
}
-// Helper for masks across fields; returns the mid count, post_mask may be 0
-static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
- MI_UNUSED(bitmap_fields);
- const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
- if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
- *pre_mask = mi_bitmap_mask_(count, bitidx);
- *mid_mask = 0;
- *post_mask = 0;
- mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
- return 0;
- }
- else {
- const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
- mi_assert_internal(pre_bits < count);
- *pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
- count -= pre_bits;
- const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
- *mid_mask = MI_BITMAP_FIELD_FULL;
- count %= MI_BITMAP_FIELD_BITS;
- *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
- mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
- return mid_count;
+
+// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
+// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
+bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
+ mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
+ {
+ size_t cidx;
+ if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
+ *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
+ mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
+ mi_assert_internal((*pidx % 8) == 0);
+ return true;
+ }
+ else {
+ if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
+ mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
+ }
+ }
}
+ mi_bitmap_forall_set_chunks_end();
+ return false;
}
-// Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously.
-bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
- size_t idx = mi_bitmap_index_field(bitmap_idx);
- size_t pre_mask;
- size_t mid_mask;
- size_t post_mask;
- size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
- bool all_one = true;
- mi_bitmap_field_t* field = &bitmap[idx];
- size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
- if ((prev & pre_mask) != pre_mask) all_one = false;
- while(mid_count-- > 0) {
- prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
- if ((prev & mid_mask) != mid_mask) all_one = false;
+// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
+// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
+bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
+ // TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
+ // TODO: allow spanning across chunk boundaries
+ if (n == 0 || n > MI_BFIELD_BITS) return false;
+ mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
+ {
+ size_t cidx;
+ if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
+ *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
+ mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
+ return true;
+ }
+ else {
+ if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
+ mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
+ }
+ }
}
- if (post_mask!=0) {
- prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
- if ((prev & post_mask) != post_mask) all_one = false;
- }
- return all_one;
-}
-
-// Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
- size_t idx = mi_bitmap_index_field(bitmap_idx);
- size_t pre_mask;
- size_t mid_mask;
- size_t post_mask;
- size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
- bool all_zero = true;
- bool any_zero = false;
- _Atomic(size_t)*field = &bitmap[idx];
- size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
- if ((prev & pre_mask) != 0) all_zero = false;
- if ((prev & pre_mask) != pre_mask) any_zero = true;
- while (mid_count-- > 0) {
- prev = mi_atomic_or_acq_rel(field++, mid_mask);
- if ((prev & mid_mask) != 0) all_zero = false;
- if ((prev & mid_mask) != mid_mask) any_zero = true;
- }
- if (post_mask!=0) {
- prev = mi_atomic_or_acq_rel(field, post_mask);
- if ((prev & post_mask) != 0) all_zero = false;
- if ((prev & post_mask) != post_mask) any_zero = true;
- }
- if (pany_zero != NULL) { *pany_zero = any_zero; }
- return all_zero;
-}
-
-
-// Returns `true` if all `count` bits were 1.
-// `any_ones` is `true` if there was at least one bit set to one.
-static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
- size_t idx = mi_bitmap_index_field(bitmap_idx);
- size_t pre_mask;
- size_t mid_mask;
- size_t post_mask;
- size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
- bool all_ones = true;
- bool any_ones = false;
- mi_bitmap_field_t* field = &bitmap[idx];
- size_t prev = mi_atomic_load_relaxed(field++);
- if ((prev & pre_mask) != pre_mask) all_ones = false;
- if ((prev & pre_mask) != 0) any_ones = true;
- while (mid_count-- > 0) {
- prev = mi_atomic_load_relaxed(field++);
- if ((prev & mid_mask) != mid_mask) all_ones = false;
- if ((prev & mid_mask) != 0) any_ones = true;
- }
- if (post_mask!=0) {
- prev = mi_atomic_load_relaxed(field);
- if ((prev & post_mask) != post_mask) all_ones = false;
- if ((prev & post_mask) != 0) any_ones = true;
- }
- if (pany_ones != NULL) { *pany_ones = any_ones; }
- return all_ones;
-}
-
-bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
- return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
-}
-
-bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
- bool any_ones;
- mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
- return any_ones;
+ mi_bitmap_forall_set_chunks_end();
+ return false;
}
diff --git a/src/bitmap.h b/src/bitmap.h
index f8898935..198a2902 100644
--- a/src/bitmap.h
+++ b/src/bitmap.h
@@ -6,105 +6,87 @@ terms of the MIT license. A copy of the license can be found in the file
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
-Concurrent bitmap that can set/reset sequences of bits atomically,
-represented as an array of fields where each field is a machine word (`size_t`)
-
-There are two api's; the standard one cannot have sequences that cross
-between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
-(this is used in region allocation)
-
-The `_across` postfixed functions do allow sequences that can cross over
-between the fields. (This is used in arena allocation)
+Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_H
#define MI_BITMAP_H
-/* -----------------------------------------------------------
- Bitmap definition
------------------------------------------------------------ */
+/* --------------------------------------------------------------------------------
+ Definitions
+-------------------------------------------------------------------------------- */
-#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
-#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
+typedef size_t mi_bfield_t;
-// An atomic bitmap of `size_t` fields
-typedef _Atomic(size_t) mi_bitmap_field_t;
-typedef mi_bitmap_field_t* mi_bitmap_t;
+#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
+#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
+#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
+#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
+#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
+#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
-// A bitmap index is the index of the bit in a bitmap.
-typedef size_t mi_bitmap_index_t;
+#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
+#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
-// Create a bit index.
-static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
- mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
- return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
-}
-static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
- mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
- return mi_bitmap_index_create_ex(idx,bitidx);
-}
-
-// Get the field index from a bit index.
-static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
- return (bitmap_idx / MI_BITMAP_FIELD_BITS);
-}
-
-// Get the bit index in a bitmap field
-static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
- return (bitmap_idx % MI_BITMAP_FIELD_BITS);
-}
-
-// Get the full bit index
-static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
- return bitmap_idx;
-}
-
-/* -----------------------------------------------------------
- Claim a bit sequence atomically
------------------------------------------------------------ */
-
-// Try to atomically claim a sequence of `count` bits in a single
-// field at `idx` in `bitmap`. Returns `true` on success.
-bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
-
-// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
-bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
-
-// Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously.
-bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-
-// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
-// Returns `true` if successful when all previous `count` bits were 0.
-bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-
-// Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
-
-bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
+ _Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
+} mi_bitmap_chunk_t;
-//--------------------------------------------------------------------------
-// the `_across` functions work on bitmaps where sequences can cross over
-// between the fields. This is used in arena allocation
-//--------------------------------------------------------------------------
+typedef mi_decl_align(32) struct mi_bitmap_s {
+ mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
+ _Atomic(mi_bfield_t)any_set;
+} mi_bitmap_t;
-// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
-// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
+#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
-// Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously.
-bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+/* --------------------------------------------------------------------------------
+ Bitmap
+-------------------------------------------------------------------------------- */
-// Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
+typedef bool mi_bit_t;
+#define MI_BIT_SET (true)
+#define MI_BIT_CLEAR (false)
-bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
+// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
+void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
-#endif
+// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
+void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
+
+// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
+// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
+// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
+bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
+
+// Is a sequence of n bits already all set/cleared?
+bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
+
+// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
+// and false otherwise leaving the bitmask as is.
+mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
+
+// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
+// and false otherwise leaving the bitmask as is.
+mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
+
+// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
+// and false otherwise leaving the bitmask as is.
+// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
+mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
+
+// Find a set bit in a bitmap and atomically unset it. Returns true on success,
+// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
+// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
+// (to reduce thread contention).
+mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
+
+// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
+// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
+mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
+
+// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
+// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
+mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
+
+#endif // MI_XBITMAP_H
diff --git a/src/free.c b/src/free.c
index f2e30b65..e1cc9276 100644
--- a/src/free.c
+++ b/src/free.c
@@ -24,7 +24,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
// ------------------------------------------------------
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
-static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
+static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_block_t* block);
// regular free of a (thread local) block pointer
// fast path written carefully to prevent spilling on the stack
@@ -57,7 +57,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL);
- size_t diff = (uint8_t*)p - page->page_start;
+ size_t diff = (uint8_t*)p - mi_page_start(page);
size_t adjust;
if mi_likely(page->block_size_shift != 0) {
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
@@ -82,72 +82,55 @@ static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, vo
#endif
// free a local pointer (page parameter comes first for better codegen)
-static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
- MI_UNUSED(segment);
+static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_attr_noexcept {
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
mi_block_check_unguard(page, block, p);
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
}
// free a pointer owned by another thread (page parameter comes first for better codegen)
-static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
+static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept {
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
mi_block_check_unguard(page, block, p);
- mi_free_block_mt(page, segment, block);
+ mi_free_block_mt(page, block);
}
// generic free (for runtime integration)
-void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
- if (is_local) mi_free_generic_local(page,segment,p);
- else mi_free_generic_mt(page,segment,p);
+void mi_decl_noinline _mi_free_generic(mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
+ if (is_local) mi_free_generic_local(page,p);
+ else mi_free_generic_mt(page,p);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in release mode but does further checks in debug mode
// (and secure mode) to see if this was a valid pointer.
-static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
+static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg)
{
- MI_UNUSED(msg);
-
- #if (MI_DEBUG>0)
+ MI_UNUSED_RELEASE(msg);
+ #if MI_DEBUG
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
return NULL;
}
#endif
-
- mi_segment_t* const segment = _mi_ptr_segment(p);
- if mi_unlikely(segment==NULL) return segment;
-
- #if (MI_DEBUG>0)
- if mi_unlikely(!mi_is_in_heap_region(p)) {
- _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
- "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
- if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
- _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
- }
+ mi_page_t* const page = _mi_ptr_page(p);
+ #if MI_DEBUG
+ if (page == MI_PAGE_PTR_INVALID) {
+ _mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);
}
#endif
- #if (MI_DEBUG>0 || MI_SECURE>=4)
- if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
- _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
- return NULL;
- }
- #endif
-
- return segment;
+ return page;
}
// Free a block
// Fast path written carefully to prevent register spilling on the stack
void mi_free(void* p) mi_attr_noexcept
{
- mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
- if mi_unlikely(segment==NULL) return;
-
- const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
- mi_page_t* const page = _mi_segment_page_of(segment, p);
+ mi_page_t* const page = mi_checked_ptr_page(p,"mi_free");
+ if mi_unlikely(page==NULL) return;
+
+ const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page));
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
// thread-local, aligned, and not a full page
@@ -156,12 +139,12 @@ void mi_free(void* p) mi_attr_noexcept
}
else {
// page is full or contains (inner) aligned blocks; use generic path
- mi_free_generic_local(page, segment, p);
+ mi_free_generic_local(page, p);
}
}
else {
// not thread-local; use generic path
- mi_free_generic_mt(page, segment, p);
+ mi_free_generic_mt(page, p);
}
}
@@ -169,10 +152,8 @@ void mi_free(void* p) mi_attr_noexcept
bool _mi_free_delayed_block(mi_block_t* block) {
// get segment and page
mi_assert_internal(block!=NULL);
- const mi_segment_t* const segment = _mi_ptr_segment(block);
- mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
- mi_assert_internal(_mi_thread_id() == segment->thread_id);
- mi_page_t* const page = _mi_segment_page_of(segment, block);
+ mi_page_t* const page = mi_checked_ptr_page(block,"_mi_free_delayed_block");
+ mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
// Clear the no-delayed flag so delayed freeing is used again for this page.
// This must be done before collecting the free lists on this page -- otherwise
@@ -242,20 +223,19 @@ static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block
}
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
-static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
+static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
- // first see if the segment was abandoned and if we can reclaim it into our thread
- if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 &&
- #if MI_HUGE_PAGE_ABANDON
- segment->page_kind != MI_PAGE_HUGE &&
- #endif
- mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned?
+ // first see if the page was abandoned and if we can reclaim it into our thread
+ if (mi_page_is_abandoned(page) &&
+ (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 ||
+ mi_page_is_singleton(page) // only one block, and we are free-ing it
+ ) &&
mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
{
- // the segment is abandoned, try to reclaim it into our heap
- if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
- mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
- mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
+ // the page is abandoned, try to reclaim it into our heap
+ if (_mi_heap_try_reclaim(mi_heap_get_default(), page)) { // TODO: avoid putting it in the full free queue
+ mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
+ // mi_assert_internal(mi_heap_get_default()->tld->subproc == page->subproc);
mi_free(block); // recursively free as now it will be a local free in our heap
return;
}
@@ -272,17 +252,12 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
_mi_padding_shrink(page, block, sizeof(mi_block_t));
- if (segment->page_kind == MI_PAGE_HUGE) {
- #if MI_HUGE_PAGE_ABANDON
- // huge page segments are always abandoned and can be freed immediately
- _mi_segment_huge_page_free(segment, page, block);
- return;
- #else
+ if (mi_page_is_huge(page)) {
+ mi_assert_internal(mi_page_is_singleton(page));
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
- _mi_segment_huge_page_reset(segment, page, block);
- #endif
+ _mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively
}
else {
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
@@ -316,9 +291,8 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p
}
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
- const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
- if mi_unlikely(segment==NULL) return 0;
- const mi_page_t* const page = _mi_segment_page_of(segment, p);
+ const mi_page_t* const page = mi_checked_ptr_page(p,msg);
+ if mi_unlikely(page==NULL) return 0;
if mi_likely(!mi_page_has_aligned(page)) {
const mi_block_t* block = (const mi_block_t*)p;
return mi_page_usable_size_of(page, block);
@@ -514,20 +488,20 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
// only maintain stats for smaller objects if requested
#if (MI_STAT>0)
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-#if (MI_STAT < 2)
+ #if (MI_STAT < 2)
MI_UNUSED(block);
-#endif
+ #endif
mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_usable_block_size(page);
-#if (MI_STAT>1)
+ #if (MI_STAT>1)
const size_t usize = mi_page_usable_size_of(page, block);
mi_heap_stat_decrease(heap, malloc, usize);
-#endif
- if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
+ #endif
+ if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_decrease(heap, normal, bsize);
-#if (MI_STAT > 1)
+ #if (MI_STAT > 1)
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
-#endif
+ #endif
}
else {
const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc
diff --git a/src/heap.c b/src/heap.c
index 581b3f71..e4955ba7 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -7,11 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
-#include "mimalloc/atomic.h"
#include "mimalloc/prim.h" // mi_prim_get_default_heap
-#include // memset, memcpy
-
#if defined(_MSC_VER) && (_MSC_VER < 1920)
#pragma warning(disable:4204) // non-constant aggregate initializer
#endif
@@ -258,7 +255,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
mi_assert_internal(mi_heap_is_initialized(heap));
// TODO: copy full empty heap instead?
- memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
+ _mi_memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
heap->thread_delayed_free = NULL;
heap->page_count = 0;
diff --git a/src/os.c b/src/os.c
index 36b167cb..83521766 100644
--- a/src/os.c
+++ b/src/os.c
@@ -59,6 +59,10 @@ size_t _mi_os_large_page_size(void) {
return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
}
+size_t _mi_os_virtual_address_bits(void) {
+ return mi_os_mem_config.virtual_address_bits;
+}
+
bool _mi_os_use_large_page(size_t size, size_t alignment) {
// if we have access, check the size and alignment requirements
if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
@@ -103,58 +107,10 @@ static void* mi_align_down_ptr(void* p, size_t alignment) {
return (void*)_mi_align_down((uintptr_t)p, alignment);
}
-
-/* -----------------------------------------------------------
- aligned hinting
--------------------------------------------------------------- */
-
-// On systems with enough virtual address bits, we can do efficient aligned allocation by using
-// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address
-// space (64TiB) we use this technique. (but see issue #939)
-#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT)
-static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
-
-// Return a MI_SEGMENT_SIZE aligned address that is probably available.
-// If this returns NULL, the OS will determine the address but on some OS's that may not be
-// properly aligned which can be more costly as it needs to be adjusted afterwards.
-// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
-// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
-// in the middle of the 2TiB - 6TiB address range (see issue #372))
-
-#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
-#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
-#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
-
-void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
-{
- if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
- if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space
- size = _mi_align_up(size, MI_SEGMENT_SIZE);
- if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
- #if (MI_SECURE>0)
- size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
- #endif
-
- uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
- if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
- uintptr_t init = MI_HINT_BASE;
- #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
- uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
- init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
- #endif
- uintptr_t expected = hint + size;
- mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
- hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
- }
- if (hint%try_alignment != 0) return NULL;
- return (void*)hint;
-}
-#else
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
MI_UNUSED(try_alignment); MI_UNUSED(size);
return NULL;
}
-#endif
/* -----------------------------------------------------------
@@ -380,12 +336,10 @@ void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
----------------------------------------------------------- */
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) {
- mi_assert(offset <= MI_SEGMENT_SIZE);
mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none();
if (stats == NULL) stats = &_mi_stats_main;
- if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) {
// regular aligned allocation
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats);
@@ -605,7 +559,6 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
#endif
}
end = start + size;
- mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
if (total_size != NULL) *total_size = size;
diff --git a/src/page-map.c b/src/page-map.c
new file mode 100644
index 00000000..d3fcef79
--- /dev/null
+++ b/src/page-map.c
@@ -0,0 +1,90 @@
+/*----------------------------------------------------------------------------
+Copyright (c) 2023-2024, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "bitmap.h"
+
+mi_decl_cache_align signed char* _mi_page_map = NULL;
+static bool mi_page_map_all_committed = false;
+static size_t mi_blocks_per_commit_bit = 1;
+static mi_memid_t mi_page_map_memid;
+static mi_bitmap_t mi_page_map_commit;
+
+static bool mi_page_map_init(void) {
+ size_t vbits = _mi_os_virtual_address_bits();
+ if (vbits >= 48) vbits = 47;
+ // 1 byte per block = 2 GiB for 128 TiB address space (48 bit = 256 TiB address space)
+ // 64 KiB for 4 GiB address space (on 32-bit)
+ const size_t page_map_size = (MI_ZU(1) << (vbits >> MI_ARENA_BLOCK_SHIFT));
+
+ const size_t min_commit_size = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);
+ mi_blocks_per_commit_bit = mi_block_count_of_size(min_commit_size);
+
+ mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems
+ _mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 0, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
+ if (_mi_page_map==NULL) {
+ _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
+ return false;
+ }
+ if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
+ _mi_warning_message("the page map was committed on-demand but not zero initialized!\n");
+ _mi_memzero_aligned(_mi_page_map, page_map_size);
+ }
+ return true;
+}
+
+static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* block_count) {
+ size_t page_size;
+ *page_start = mi_page_area(page, &page_size);
+ if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE; } // furthest interior pointer
+ *block_count = mi_block_count_of_size(page_size);
+ return ((uintptr_t)*page_start >> MI_ARENA_BLOCK_SHIFT);
+}
+
+
+
+void _mi_page_map_register(mi_page_t* page) {
+ if mi_unlikely(_mi_page_map == NULL) {
+ if (!mi_page_map_init()) return;
+ }
+ uint8_t* page_start;
+ size_t block_count;
+ const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
+
+ // is the page map area that contains the page address committed?
+ if (!mi_page_map_all_committed) {
+ const size_t commit_bit_count = _mi_divide_up(block_count, mi_blocks_per_commit_bit);
+ const size_t commit_bit_idx = idx / mi_blocks_per_commit_bit;
+ for (size_t i = 0; i < commit_bit_count; i++) { // per bit to avoid crossing over bitmap chunks
+ if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, commit_bit_idx + i, 1)) {
+ // this may race, in which case we do multiple commits (which is ok)
+ _mi_os_commit(page_start + (i*mi_blocks_per_commit_bit*MI_ARENA_BLOCK_SIZE), mi_blocks_per_commit_bit* MI_ARENA_BLOCK_SIZE, NULL, NULL);
+ mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, commit_bit_idx + i, 1, NULL);
+ }
+ }
+ }
+
+ // set the offsets
+ for (int i = 0; i < block_count; i++) {
+ mi_assert_internal(i < 128);
+ _mi_page_map[idx + i] = (int8_t)(-i-1);
+ }
+}
+
+
+void _mi_page_map_unregister(mi_page_t* page) {
+ mi_assert_internal(_mi_page_map != NULL);
+
+ // get index and count
+ uint8_t* page_start;
+ size_t block_count;
+ const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
+
+ // unset the offsets
+ _mi_memzero(_mi_page_map + idx, block_count);
+}
diff --git a/src/page.c b/src/page.c
index c681d6d0..a00ff615 100644
--- a/src/page.c
+++ b/src/page.c
@@ -59,7 +59,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) {
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
size_t psize;
- uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
+ uint8_t* page_area = mi_page_area(page, &psize);
mi_block_t* start = (mi_block_t*)page_area;
mi_block_t* end = (mi_block_t*)(page_area + psize);
while(p != NULL) {
@@ -83,10 +83,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->capacity <= page->reserved);
// const size_t bsize = mi_page_block_size(page);
- mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = mi_page_start(page);
- mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
- mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE));
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@@ -122,15 +119,11 @@ bool _mi_page_is_valid(mi_page_t* page) {
mi_assert_internal(page->keys[0] != 0);
#endif
if (mi_page_heap(page)!=NULL) {
- mi_segment_t* segment = _mi_page_segment(page);
- mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
- #if MI_HUGE_PAGE_ABANDON
- if (segment->page_kind != MI_PAGE_HUGE)
- #endif
+ mi_assert_internal(!_mi_process_is_initialized || page->thread_id == mi_page_heap(page)->thread_id || page->thread_id==0);
{
mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page));
- mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
+ mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_MAX_OBJ_SIZE || mi_page_is_in_full(page));
mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
}
}
@@ -274,16 +267,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
#if !MI_HUGE_PAGE_ABANDON
mi_assert_internal(pq != NULL);
mi_assert_internal(mi_heap_contains_queue(heap, pq));
- mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size);
+ mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size);
#endif
- mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
+ mi_page_t* page = _mi_heap_page_alloc(heap, block_size, page_alignment);
if (page == NULL) {
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
return NULL;
}
- #if MI_HUGE_PAGE_ABANDON
- mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
- #endif
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
// a fresh page was found, initialize it
const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
@@ -384,7 +374,6 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_heap_t* pheap = mi_page_heap(page);
// remove from our page list
- mi_segments_tld_t* segments_tld = &pheap->tld->segments;
mi_page_queue_remove(pq, page);
// page is no longer associated with our heap
@@ -399,8 +388,8 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
#endif
// and abandon it
- mi_assert_internal(mi_page_heap(page) == NULL);
- _mi_segment_page_abandon(page,segments_tld);
+ mi_assert_internal(mi_page_is_abandoned(page));
+ _mi_arena_page_abandon(page,&pheap->tld);
}
// force abandon a page
@@ -411,8 +400,7 @@ void _mi_page_force_abandon(mi_page_t* page) {
// ensure this page is no longer in the heap delayed free list
_mi_heap_delayed_free_all(heap);
- // We can still access the page meta-info even if it is freed as we ensure
- // in `mi_segment_force_abandon` that the segment is not freed (yet)
+ // TODO: can we still access the page meta-info even if it is freed?
if (page->capacity == 0) return; // it may have been freed now
// and now unlink it from the page queue and abandon (or free)
@@ -433,17 +421,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
+ mi_heap_t* pheap = mi_page_heap(page);
+
// no more aligned blocks in here
mi_page_set_has_aligned(page, false);
// remove from the page list
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
- mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
mi_page_queue_remove(pq, page);
// and free it
mi_page_set_heap(page,NULL);
- _mi_segment_page_free(page, force, segments_tld);
+ _mi_arena_page_free(page, force, &pheap->tld);
}
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
@@ -474,7 +463,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
if (pq->last==page && pq->first==page) { // the only page in the queue?
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
- page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
+ page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
mi_heap_t* heap = mi_page_heap(page);
mi_assert_internal(pq >= heap->pages);
const size_t index = pq - heap->pages;
@@ -639,7 +628,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
size_t page_size;
//uint8_t* page_start =
- _mi_segment_page_start(_mi_page_segment(page), page, &page_size);
+ mi_page_area(page, &page_size);
mi_stat_counter_increase(tld->stats.pages_extended, 1);
// calculate the extend count
@@ -676,15 +665,13 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
// Initialize a fresh page
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
mi_assert(page != NULL);
- mi_segment_t* segment = _mi_page_segment(page);
- mi_assert(segment != NULL);
mi_assert_internal(block_size > 0);
// set fields
mi_page_set_heap(page, heap);
page->block_size = block_size;
size_t page_size;
- page->page_start = _mi_segment_page_start(segment, page, &page_size);
- mi_track_mem_noaccess(page->page_start,page_size);
+ uint8_t* page_start = mi_page_area(page, &page_size);
+ mi_track_mem_noaccess(page_start,page_size);
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
mi_assert_internal(page->reserved > 0);
@@ -692,15 +679,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
page->keys[0] = _mi_heap_random_next(heap);
page->keys[1] = _mi_heap_random_next(heap);
#endif
- page->free_is_zero = page->is_zero_init;
+ page->free_is_zero = page->memid.initially_zero;
#if MI_DEBUG>2
- if (page->is_zero_init) {
+ if (page->memid.initially_zero) {
mi_track_mem_defined(page->page_start, page_size);
- mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size));
+ mi_assert_expensive(mi_mem_is_zero(page_start, page_size));
}
#endif
if (block_size > 0 && _mi_is_power_of_two(block_size)) {
- page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size));
+ page->block_size_shift = (uint8_t)mi_ctz(block_size);
}
else {
page->block_size_shift = 0;
@@ -734,13 +721,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
// search for a best next page to use for at most N pages (often cut short if immediate blocks are available)
#define MI_MAX_CANDIDATE_SEARCH (8)
-// is the page not yet used up to its reserved space?
-static bool mi_page_is_expandable(const mi_page_t* page) {
- mi_assert_internal(page != NULL);
- mi_assert_internal(page->capacity <= page->reserved);
- return (page->capacity < page->reserved);
-}
-
// Find a page with free blocks of `page->block_size`.
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
@@ -907,7 +887,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
#if MI_HUGE_PAGE_ABANDON
mi_page_queue_t* pq = NULL;
#else
- mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size
+ mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_MAX_OBJ_SIZE+1); // always in the huge queue regardless of the block size
mi_assert_internal(mi_page_queue_is_huge(pq));
#endif
mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
@@ -915,10 +895,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(mi_page_is_huge(page));
- mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE);
- mi_assert_internal(_mi_page_segment(page)->used==1);
+ mi_assert_internal(mi_page_is_singleton(page));
#if MI_HUGE_PAGE_ABANDON
- mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
+ mi_assert_internal(mi_page_is_abandoned(page));
mi_page_set_heap(page, NULL);
#endif
mi_heap_stat_increase(heap, huge, mi_page_block_size(page));
@@ -933,7 +912,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
// huge allocation?
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
- if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
+ if mi_unlikely(req_size > (MI_LARGE_MAX_OBJ_SIZE - MI_PADDING_SIZE) || huge_alignment > 0) {
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
return NULL;
diff --git a/src/static.c b/src/static.c
index 9e06ce05..b34d5d42 100644
--- a/src/static.c
+++ b/src/static.c
@@ -20,7 +20,7 @@ terms of the MIT license. A copy of the license can be found in the file
// containing the whole library. If it is linked first
// it will override all the standard library allocation
// functions (on Unix's).
-#include "alloc.c" // includes alloc-override.c
+#include "alloc.c" // includes alloc-override.c and free.c
#include "alloc-aligned.c"
#include "alloc-posix.c"
#include "arena.c"
@@ -31,6 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include "options.c"
#include "os.c"
#include "page.c" // includes page-queue.c
+#include "page-map.c"
#include "random.c"
#include "segment.c"
#include "segment-map.c"
diff --git a/src/xbitmap.c b/src/xbitmap.c
deleted file mode 100644
index 68525c84..00000000
--- a/src/xbitmap.c
+++ /dev/null
@@ -1,599 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-Concurrent bitmap that can set/reset sequences of bits atomically
----------------------------------------------------------------------------- */
-
-#include "mimalloc.h"
-#include "mimalloc/internal.h"
-#include "mimalloc/bits.h"
-#include "xbitmap.h"
-
-/* --------------------------------------------------------------------------------
- bfields
--------------------------------------------------------------------------------- */
-
-static inline size_t mi_bfield_ctz(mi_bfield_t x) {
- return mi_ctz(x);
-}
-
-static inline size_t mi_bfield_clz(mi_bfield_t x) {
- return mi_clz(x);
-}
-
-// find the least significant bit that is set (i.e. count trailing zero's)
-// return false if `x==0` (with `*idx` undefined) and true otherwise,
-// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
-static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
- return mi_bsf(x,idx);
-}
-
-static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
- return mi_rotr(x,r);
-}
-
-// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
-static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
- mi_assert_internal(idx < MI_BFIELD_BITS);
- const mi_bfield_t mask = ((mi_bfield_t)1)<bfields[i], idx);
-}
-
-static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
- mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
- const size_t i = byte_idx / MI_BFIELD_SIZE;
- const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
- return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
-}
-
-// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
-static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
- mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
- mi_assert_internal(n>0);
- bool all_transition = true;
- bool all_already_xset = true;
- size_t idx = cidx % MI_BFIELD_BITS;
- size_t field = cidx / MI_BFIELD_BITS;
- while (n > 0) {
- size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
- if (m > n) { m = n; }
- mi_assert_internal(idx + m <= MI_BFIELD_BITS);
- mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
- const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask, &already_xset);
- all_already_xset = all_already_xset && already_xset;
- // next field
- field++;
- idx = 0;
- n -= m;
- }
- *palready_xset = all_already_xset;
- return all_transition;
-}
-
-// Check if a sequence of `n` bits within a chunk are all set/cleared.
-static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
- mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
- mi_assert_internal(n>0);
- bool all_xset = true;
- size_t idx = cidx % MI_BFIELD_BITS;
- size_t field = cidx / MI_BFIELD_BITS;
- while (n > 0) {
- size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
- if (m > n) { m = n; }
- mi_assert_internal(idx + m <= MI_BFIELD_BITS);
- mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
- const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask);
- // next field
- field++;
- idx = 0;
- n -= m;
- }
- return all_xset;
-}
-
-// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
-// and false otherwise leaving all bit fields as is.
-static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
- mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
- mi_assert_internal(n>0);
- if (n==0) return true;
- size_t start_idx = cidx % MI_BFIELD_BITS;
- size_t start_field = cidx / MI_BFIELD_BITS;
- size_t end_field = MI_BITMAP_CHUNK_FIELDS;
- size_t mask_mid = 0;
- size_t mask_end = 0;
-
- // first field
- size_t field = start_field;
- size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
- if (m > n) { m = n; }
- mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
- mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
- const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask_start)) return false;
-
- // done?
- n -= m;
- if (n==0) return true;
-
- // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
-
- // mid fields
- while (n >= MI_BFIELD_BITS) {
- field++;
- mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
- mask_mid = ~MI_ZU(0);
- if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
- n -= MI_BFIELD_BITS;
- }
-
- // last field
- if (n > 0) {
- mi_assert_internal(n < MI_BFIELD_BITS);
- field++;
- mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
- end_field = field;
- mask_end = (MI_ZU(1)<bfields[field], mask_end)) goto restore;
- }
-
- return true;
-
-restore:
- // field is on the field that failed to set atomically; we need to restore all previous fields
- mi_assert_internal(field > start_field);
- while( field > start_field) {
- field--;
- const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
- bool already_xset;
- mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
- }
- return false;
-}
-
-
-// find least 1-bit in a chunk and try unset it atomically
-// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
-// todo: try neon version
-static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
- #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
- while(true) {
- const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
- if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
- const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
- const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
- mi_assert_internal(mask != 0);
- const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
- mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
- size_t cidx;
- if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
- if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
- *pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
- mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
- return true;
- }
- }
- // try again
- }
- #else
- size_t idx;
- for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
- size_t idx;
- if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
- if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
- *pidx = (i*MI_BFIELD_BITS + idx);
- mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
- return true;
- }
- }
- }
- return false;
- #endif
-}
-
-
-// find least byte in a chunk with all bits set, and try unset it atomically
-// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
-// todo: try neon version
-static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
- #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
- while(true) {
- const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
- const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
- const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
- if (mask == 0) return false;
- const size_t i = _tzcnt_u32(mask);
- mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
- const size_t chunk_idx = i / MI_BFIELD_SIZE;
- const size_t byte_idx = i % MI_BFIELD_SIZE;
- if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
- *pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
- mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
- return true;
- }
- // try again
- }
- #else
- size_t idx;
- for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
- const mi_bfield_t x = chunk->bfields[i];
- // has_set8 has low bit in each byte set if the byte in x == 0xFF
- const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
- (x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
- >> 7; // shift high bit to low bit
- size_t idx;
- if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
- mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
- mi_assert_internal((idx%8)==0);
- const size_t byte_idx = idx/8;
- if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
- *pidx = (i*MI_BFIELD_BITS) + idx;
- mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
- return true;
- }
- // else continue
- }
- }
- return false;
- #endif
-}
-
-
-// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
-// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
-// todo: try avx2 and neon version
-// todo: allow spanning across bfield boundaries?
-static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
- if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
- const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
- for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
- mi_bfield_t b = chunk->bfields[i];
- size_t bshift = 0;
- size_t idx;
- while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
- b >>= idx;
- bshift += idx;
- if (bshift + n >= MI_BFIELD_BITS) break;
-
- if ((b&mask) == mask) { // found a match
- mi_assert_internal( ((mask << bshift) >> bshift) == mask );
- if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<bfields[i] >> bshift);
- }
- }
- else {
- // advance
- const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
- mi_assert_internal(ones>0);
- bshift += ones;
- b >>= ones;
- }
- }
- }
- return false;
-}
-
-
-// are all bits in a bitmap chunk set?
-static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
- #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
- const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
- return _mm256_test_all_ones(vec);
- #else
- // written like this for vectorization
- mi_bfield_t x = chunk->bfields[0];
- for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
- x = x & chunk->bfields[i];
- }
- return (~x == 0);
- #endif
-}
-
-// are all bits in a bitmap chunk clear?
-static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
- #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
- const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
- return _mm256_testz_si256( vec, vec );
- #else
- // written like this for vectorization
- mi_bfield_t x = chunk->bfields[0];
- for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
- x = x | chunk->bfields[i];
- }
- return (x == 0);
- #endif
-}
-
-/* --------------------------------------------------------------------------------
- bitmap
--------------------------------------------------------------------------------- */
-// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
-void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
- if (!already_zero) {
- _mi_memzero_aligned(bitmap, sizeof(*bitmap));
- }
-}
-
-// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
-void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
- mi_assert_internal(n>0);
- mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
-
- // first chunk
- size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
- const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
- size_t m = MI_BITMAP_CHUNK_BITS - cidx;
- if (m > n) { m = n; }
- bool already_xset;
- mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
-
- // n can be large so use memset for efficiency for all in-between chunks
- chunk_idx++;
- n -= m;
- const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
- if (mid_chunks > 0) {
- _mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
- chunk_idx += mid_chunks;
- n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
- }
-
- // last chunk
- if (n > 0) {
- mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
- mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
- mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
- }
-}
-
-
-// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
-// and false otherwise leaving the bitmask as is.
-bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
- mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
- const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
- const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
- return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
-}
-
-// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
-// and false otherwise leaving the bitmask as is.
-bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
- mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
- mi_assert_internal(idx%8 == 0);
- const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
- const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
- return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
-}
-
-// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
-// and false otherwise leaving the bitmask as is.
-// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
-bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
- mi_assert_internal(n>0);
- mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
- if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
- if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
-
- mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
- const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
- const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
- mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
- if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
- return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
-}
-
-// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
-// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
-bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
- mi_assert_internal(n>0);
- mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
- bool local_already_xset;
- if (already_xset==NULL) { already_xset = &local_already_xset; }
- // if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
- // if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
-
- mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
- const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
- const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
- mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
- if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
- return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
-}
-
-// Is a sequence of n bits already all set/cleared?
-bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
- mi_assert_internal(n>0);
- mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
- mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
- const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
- const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
- mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
- if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
- return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
-}
-
-
-#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
- { size_t _set_idx; \
- size_t _start = start % MI_BFIELD_BITS; \
- mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
- while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
- decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
-
-#define mi_bitmap_forall_set_chunks_end() \
- _start += _set_idx+1; /* so chunk_idx stays valid */ \
- _any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
- _any_set >>= 1; \
- } \
- }
-
-// Find a set bit in a bitmap and atomically unset it. Returns true on success,
-// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
-// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
-// (to reduce thread contention).
-bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
- mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
- {
- size_t cidx;
- if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
- *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
- mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
- return true;
- }
- else {
- // we may find that all are unset only on a second iteration but that is ok as
- // _any_set is a conservative approximation.
- if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
- mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
- }
- }
- }
- mi_bitmap_forall_set_chunks_end();
- return false;
-}
-
-
-// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
-// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
-bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
- mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
- {
- size_t cidx;
- if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
- *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
- mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
- mi_assert_internal((*pidx % 8) == 0);
- return true;
- }
- else {
- if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
- mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
- }
- }
- }
- mi_bitmap_forall_set_chunks_end();
- return false;
-}
-
-// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
-// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
-bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
- // TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
- // TODO: allow spanning across chunk boundaries
- if (n == 0 || n > MI_BFIELD_BITS) return false;
- mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
- {
- size_t cidx;
- if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
- *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
- mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
- return true;
- }
- else {
- if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
- mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
- }
- }
- }
- mi_bitmap_forall_set_chunks_end();
- return false;
-}
diff --git a/src/xbitmap.h b/src/xbitmap.h
deleted file mode 100644
index 869db2a2..00000000
--- a/src/xbitmap.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-Concurrent bitmap that can set/reset sequences of bits atomically
----------------------------------------------------------------------------- */
-#pragma once
-#ifndef MI_XBITMAP_H
-#define MI_XBITMAP_H
-
-/* --------------------------------------------------------------------------------
- Definitions
--------------------------------------------------------------------------------- */
-
-typedef size_t mi_bfield_t;
-
-#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
-#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
-#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
-#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
-#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
-#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
-
-#define MI_BITMAP_CHUNK_BITS_SHIFT (8) // 2^8 = 256 bits per chunk
-#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
-#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
-#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
-
-typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
- _Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
-} mi_bitmap_chunk_t;
-
-
-typedef mi_decl_align(32) struct mi_bitmap_s {
- mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
- _Atomic(mi_bfield_t)any_set;
-} mi_bitmap_t;
-
-#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
-
-/* --------------------------------------------------------------------------------
- Bitmap
--------------------------------------------------------------------------------- */
-
-typedef bool mi_bit_t;
-#define MI_BIT_SET (true)
-#define MI_BIT_CLEAR (false)
-
-// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
-void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
-
-// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
-void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
-
-// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
-// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
-// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
-bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
-
-// Is a sequence of n bits already all set/cleared?
-bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
-
-// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
-// and false otherwise leaving the bitmask as is.
-mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
-
-// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
-// and false otherwise leaving the bitmask as is.
-mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
-
-// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
-// and false otherwise leaving the bitmask as is.
-// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
-mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
-
-// Find a set bit in a bitmap and atomically unset it. Returns true on success,
-// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
-// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
-// (to reduce thread contention).
-mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
-
-// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
-// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
-mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
-
-// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
-// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
-mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
-
-#endif // MI_XBITMAP_H