mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-08 00:09:31 +03:00
wip: further progress on removing segments
This commit is contained in:
parent
71cfa45e76
commit
441d4fed9f
21 changed files with 2396 additions and 2492 deletions
|
@ -55,6 +55,7 @@ set(mi_sources
|
|||
src/options.c
|
||||
src/os.c
|
||||
src/page.c
|
||||
src/page-map.c
|
||||
src/random.c
|
||||
src/segment.c
|
||||
src/segment-map.c
|
||||
|
|
|
@ -214,12 +214,7 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\alloc-posix.c" />
|
||||
<ClCompile Include="..\..\src\alloc.c" />
|
||||
<ClCompile Include="..\..\src\arena-abandoned.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\arena.c" />
|
||||
<ClCompile Include="..\..\src\bitmap.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
|
@ -232,6 +227,7 @@
|
|||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\libc.c" />
|
||||
<ClCompile Include="..\..\src\page-map.c" />
|
||||
<ClCompile Include="..\..\src\prim\prim.c" />
|
||||
<ClCompile Include="..\..\src\prim\windows\prim.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
|
@ -248,12 +244,8 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\page.c" />
|
||||
<ClCompile Include="..\..\src\random.c" />
|
||||
<ClCompile Include="..\..\src\segment-map.c" />
|
||||
<ClCompile Include="..\..\src\segment.c" />
|
||||
<ClCompile Include="..\..\src\os.c" />
|
||||
<ClCompile Include="..\..\src\stats.c" />
|
||||
<ClCompile Include="..\..\src\xarena.c" />
|
||||
<ClCompile Include="..\..\src\xbitmap.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
|
||||
|
|
|
@ -43,12 +43,6 @@
|
|||
<ClCompile Include="..\..\src\random.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\segment.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\segment-map.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\stats.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
|
@ -58,13 +52,10 @@
|
|||
<ClCompile Include="..\..\src\free.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\arena-abandoned.c">
|
||||
<ClCompile Include="..\..\src\arena.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\xbitmap.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\xarena.c">
|
||||
<ClCompile Include="..\..\src\page-map.c">
|
||||
<Filter>Sources</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
|
|
|
@ -100,6 +100,10 @@ typedef int32_t mi_ssize_t;
|
|||
#define __BMI1__ 1
|
||||
#endif
|
||||
|
||||
// Define big endian if needed
|
||||
// #define MI_BIG_ENDIAN 1
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Builtin's
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
@ -310,4 +314,6 @@ static inline size_t mi_rotl(size_t x, size_t r) {
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // MI_BITS_H
|
||||
|
|
|
@ -108,6 +108,7 @@ size_t _mi_os_page_size(void);
|
|||
size_t _mi_os_good_alloc_size(size_t size);
|
||||
bool _mi_os_has_overcommit(void);
|
||||
bool _mi_os_has_virtual_reserve(void);
|
||||
size_t _mi_os_virtual_address_bits(void);
|
||||
|
||||
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
|
||||
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||
|
@ -136,12 +137,11 @@ bool _mi_arena_contains(const void* p);
|
|||
void _mi_arenas_collect(bool force_purge, mi_stats_t* stats);
|
||||
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
|
||||
|
||||
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment);
|
||||
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment);
|
||||
|
||||
void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid);
|
||||
void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size);
|
||||
|
||||
/*
|
||||
typedef struct mi_arena_field_cursor_s { // abstract struct
|
||||
size_t os_list_count; // max entries to visit in the OS abandoned list
|
||||
size_t start; // start arena idx (may need to be wrapped)
|
||||
|
@ -154,27 +154,12 @@ typedef struct mi_arena_field_cursor_s { // abstract struct
|
|||
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current);
|
||||
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
|
||||
void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current);
|
||||
*/
|
||||
|
||||
// "segment-map.c"
|
||||
void _mi_segment_map_allocated_at(const mi_segment_t* segment);
|
||||
void _mi_segment_map_freed_at(const mi_segment_t* segment);
|
||||
// "page-map.c"
|
||||
void _mi_page_map_register(mi_page_t* page);
|
||||
void _mi_page_map_unregister(mi_page_t* page);
|
||||
|
||||
// "segment.c"
|
||||
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
|
||||
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
|
||||
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
|
||||
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size);
|
||||
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
|
||||
#else
|
||||
void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
|
||||
#endif
|
||||
|
||||
void _mi_segments_collect(bool force, mi_segments_tld_t* tld);
|
||||
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
|
||||
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);
|
||||
bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
||||
|
||||
// "page.c"
|
||||
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
|
||||
|
@ -226,7 +211,7 @@ void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, siz
|
|||
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
|
||||
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p);
|
||||
bool _mi_free_delayed_block(mi_block_t* block);
|
||||
void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
|
||||
// void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
|
||||
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
|
||||
|
||||
// "libc.c"
|
||||
|
@ -338,8 +323,8 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
|
|||
|
||||
|
||||
// Align a pointer upwards
|
||||
static inline void* mi_align_up_ptr(void* p, size_t alignment) {
|
||||
return (void*)_mi_align_up((uintptr_t)p, alignment);
|
||||
static inline uint8_t* _mi_align_up_ptr(void* p, size_t alignment) {
|
||||
return (uint8_t*)_mi_align_up((uintptr_t)p, alignment);
|
||||
}
|
||||
|
||||
|
||||
|
@ -445,68 +430,44 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
|
|||
return heap->pages_free_direct[idx];
|
||||
}
|
||||
|
||||
// Segment that contains the pointer
|
||||
// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
|
||||
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
|
||||
// therefore we align one byte before `p`.
|
||||
// We check for NULL afterwards on 64-bit systems to improve codegen for `mi_free`.
|
||||
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
|
||||
mi_segment_t* const segment = (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
|
||||
#if MI_INTPTR_SIZE <= 4
|
||||
return (p==NULL ? NULL : segment);
|
||||
#else
|
||||
return ((intptr_t)segment <= 0 ? NULL : segment);
|
||||
|
||||
extern signed char* _mi_page_map;
|
||||
|
||||
#define MI_PAGE_PTR_INVALID ((mi_page_t*)(1))
|
||||
|
||||
static inline mi_page_t* _mi_ptr_page(const void* p) {
|
||||
const uintptr_t up = ((uintptr_t)p) >> MI_ARENA_BLOCK_SHIFT;
|
||||
const ptrdiff_t ofs = _mi_page_map[up];
|
||||
#if MI_DEBUG
|
||||
if mi_unlikely(ofs==0) return MI_PAGE_PTR_INVALID;
|
||||
#endif
|
||||
return (mi_page_t*)((up + ofs - 1) << MI_ARENA_BLOCK_SHIFT);
|
||||
}
|
||||
|
||||
// Segment belonging to a page
|
||||
static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
|
||||
mi_assert_internal(page!=NULL);
|
||||
mi_segment_t* segment = _mi_ptr_segment(page);
|
||||
mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]);
|
||||
return segment;
|
||||
}
|
||||
|
||||
// used internally
|
||||
static inline size_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) {
|
||||
// if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages
|
||||
ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
|
||||
mi_assert_internal(diff >= 0 && (size_t)diff <= MI_SEGMENT_SIZE /* for huge alignment it can be equal */);
|
||||
size_t idx = (size_t)diff >> segment->page_shift;
|
||||
mi_assert_internal(idx < segment->capacity);
|
||||
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
|
||||
return idx;
|
||||
}
|
||||
|
||||
// Get the page containing the pointer
|
||||
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
|
||||
size_t idx = _mi_segment_page_idx_of(segment, p);
|
||||
return &((mi_segment_t*)segment)->pages[idx];
|
||||
}
|
||||
|
||||
// Quick page start for initialized pages
|
||||
static inline uint8_t* mi_page_start(const mi_page_t* page) {
|
||||
mi_assert_internal(page->page_start != NULL);
|
||||
mi_assert_expensive(_mi_segment_page_start(_mi_page_segment(page),page,NULL) == page->page_start);
|
||||
return page->page_start;
|
||||
}
|
||||
|
||||
// Get the page containing the pointer
|
||||
static inline mi_page_t* _mi_ptr_page(void* p) {
|
||||
mi_assert_internal(p!=NULL);
|
||||
return _mi_segment_page_of(_mi_ptr_segment(p), p);
|
||||
}
|
||||
|
||||
// Get the block size of a page (special case for huge objects)
|
||||
// Get the block size of a page
|
||||
static inline size_t mi_page_block_size(const mi_page_t* page) {
|
||||
mi_assert_internal(page->block_size > 0);
|
||||
return page->block_size;
|
||||
}
|
||||
|
||||
static inline bool mi_page_is_huge(const mi_page_t* page) {
|
||||
mi_assert_internal((page->is_huge && _mi_page_segment(page)->page_kind == MI_PAGE_HUGE) ||
|
||||
(!page->is_huge && _mi_page_segment(page)->page_kind != MI_PAGE_HUGE));
|
||||
return page->is_huge;
|
||||
// Page start
|
||||
static inline uint8_t* mi_page_start(const mi_page_t* page) {
|
||||
mi_assert(sizeof(mi_page_t) <= MI_PAGE_INFO_SIZE);
|
||||
return (uint8_t*)page + MI_PAGE_INFO_SIZE;
|
||||
}
|
||||
|
||||
static inline uint8_t* mi_page_area(const mi_page_t* page, size_t* size) {
|
||||
if (size) { *size = mi_page_block_size(page) * page->reserved; }
|
||||
return mi_page_start(page);
|
||||
}
|
||||
|
||||
static inline bool mi_page_is_in_arena(const mi_page_t* page) {
|
||||
return (page->memid.memkind == MI_MEM_ARENA);
|
||||
}
|
||||
|
||||
static inline bool mi_page_is_singleton(const mi_page_t* page) {
|
||||
return (page->reserved == 1);
|
||||
}
|
||||
|
||||
// Get the usable block size of a page without fixed padding.
|
||||
|
@ -515,11 +476,6 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
|
|||
return mi_page_block_size(page) - MI_PADDING_SIZE;
|
||||
}
|
||||
|
||||
// size of a segment
|
||||
static inline size_t mi_segment_size(mi_segment_t* segment) {
|
||||
return segment->segment_size;
|
||||
}
|
||||
|
||||
// Thread free access
|
||||
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
|
||||
return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
|
||||
|
@ -534,10 +490,20 @@ static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
|
|||
return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
|
||||
}
|
||||
|
||||
static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
|
||||
return mi_atomic_load_relaxed(&page->xthread_id);
|
||||
}
|
||||
|
||||
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
|
||||
mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
|
||||
mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
|
||||
if (heap != NULL) { page->heap_tag = heap->tag; }
|
||||
if (heap != NULL) {
|
||||
page->heap_tag = heap->tag;
|
||||
mi_atomic_store_release(&page->xthread_id, heap->thread_id);
|
||||
}
|
||||
else {
|
||||
mi_atomic_store_release(&page->xthread_id,0);
|
||||
}
|
||||
}
|
||||
|
||||
// Thread free flag helpers
|
||||
|
@ -576,6 +542,21 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) {
|
|||
return (page->free != NULL);
|
||||
}
|
||||
|
||||
|
||||
// is the page not yet used up to its reserved space?
|
||||
static inline bool mi_page_is_expandable(const mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL);
|
||||
mi_assert_internal(page->capacity <= page->reserved);
|
||||
return (page->capacity < page->reserved);
|
||||
}
|
||||
|
||||
|
||||
static inline bool mi_page_is_full(mi_page_t* page) {
|
||||
bool full = (page->reserved == page->used);
|
||||
mi_assert_internal(!full || page->free == NULL);
|
||||
return full;
|
||||
}
|
||||
|
||||
// is more than 7/8th of a page in use?
|
||||
static inline bool mi_page_mostly_used(const mi_page_t* page) {
|
||||
if (page==NULL) return true;
|
||||
|
@ -583,6 +564,15 @@ static inline bool mi_page_mostly_used(const mi_page_t* page) {
|
|||
return (page->reserved - page->used <= frac);
|
||||
}
|
||||
|
||||
static inline bool mi_page_is_abandoned(mi_page_t* page) {
|
||||
return (mi_page_thread_id(page) == 0);
|
||||
}
|
||||
|
||||
static inline bool mi_page_is_huge(mi_page_t* page) {
|
||||
return (page->block_size > MI_LARGE_MAX_OBJ_SIZE);
|
||||
}
|
||||
|
||||
|
||||
static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
|
||||
return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
|
||||
}
|
||||
|
@ -667,17 +657,8 @@ We also pass a separate `null` value to be used as `NULL` or otherwise
|
|||
`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
|
||||
------------------------------------------------------------------- */
|
||||
|
||||
static inline bool mi_is_in_same_segment(const void* p, const void* q) {
|
||||
return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
|
||||
}
|
||||
|
||||
static inline bool mi_is_in_same_page(const void* p, const void* q) {
|
||||
mi_segment_t* segmentp = _mi_ptr_segment(p);
|
||||
mi_segment_t* segmentq = _mi_ptr_segment(q);
|
||||
if (segmentp != segmentq) return false;
|
||||
size_t idxp = _mi_segment_page_idx_of(segmentp, p);
|
||||
size_t idxq = _mi_segment_page_idx_of(segmentq, q);
|
||||
return (idxp == idxq);
|
||||
return (_mi_ptr_page(p) == _mi_ptr_page(q));
|
||||
}
|
||||
|
||||
static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
|
||||
|
@ -693,7 +674,7 @@ static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const
|
|||
static inline uint32_t mi_ptr_encode_canary(const void* null, const void* p, const uintptr_t* keys) {
|
||||
const uint32_t x = (uint32_t)(mi_ptr_encode(null,p,keys));
|
||||
// make the lowest byte 0 to prevent spurious read overflows which could be a security issue (issue #951)
|
||||
#ifdef MI_BIG_ENDIAN
|
||||
#if MI_BIG_ENDIAN
|
||||
return (x & 0x00FFFFFF);
|
||||
#else
|
||||
return (x & 0xFFFFFF00);
|
||||
|
@ -749,6 +730,20 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
|
|||
#endif
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
arena blocks
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Blocks needed for a given byte size
|
||||
static inline size_t mi_block_count_of_size(size_t size) {
|
||||
return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
// Byte size of a number of blocks
|
||||
static inline size_t mi_size_of_blocks(size_t bcount) {
|
||||
return (bcount * MI_ARENA_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
memory id's
|
||||
|
|
|
@ -111,40 +111,29 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
// Main internal data-structures
|
||||
// ------------------------------------------------------
|
||||
|
||||
// Main tuning parameters for segment and page sizes
|
||||
// Sizes for 64-bit, divide by two for 32-bit
|
||||
#ifndef MI_SMALL_PAGE_SHIFT
|
||||
#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB
|
||||
// Sizes are for 64-bit
|
||||
#ifndef MI_ARENA_BLOCK_SHIFT
|
||||
#ifdef MI_SMALL_PAGE_SHIFT // compatibility
|
||||
#define MI_ARENA_BLOCK_SHIFT MI_SMALL_PAGE_SHIFT
|
||||
#else
|
||||
#define MI_ARENA_BLOCK_SHIFT (13 + MI_SIZE_SHIFT) // 64 KiB (32 KiB on 32-bit)
|
||||
#endif
|
||||
#ifndef MI_MEDIUM_PAGE_SHIFT
|
||||
#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB
|
||||
#endif
|
||||
#ifndef MI_LARGE_PAGE_SHIFT
|
||||
#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4MiB
|
||||
#endif
|
||||
#ifndef MI_SEGMENT_SHIFT
|
||||
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4MiB -- must be equal to `MI_LARGE_PAGE_SHIFT`
|
||||
#ifndef MI_BITMAP_CHUNK_BITS_SHIFT
|
||||
#define MI_BITMAP_CHUNK_BITS_SHIFT 8 // optimized for 256 bits per chunk (avx2)
|
||||
#endif
|
||||
|
||||
// Derived constants
|
||||
#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
|
||||
#define MI_SEGMENT_ALIGN (MI_SEGMENT_SIZE)
|
||||
#define MI_SEGMENT_MASK ((uintptr_t)(MI_SEGMENT_ALIGN - 1))
|
||||
#define MI_ARENA_BLOCK_SIZE (MI_ZU(1) << MI_ARENA_BLOCK_SHIFT)
|
||||
#define MI_ARENA_BLOCK_ALIGN (MI_ARENA_BLOCK_SIZE)
|
||||
#define MI_BITMAP_CHUNK_BITS (MI_ZU(1) << MI_BITMAP_CHUNK_BITS_SHIFT)
|
||||
|
||||
#define MI_SMALL_PAGE_SIZE (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
|
||||
#define MI_MEDIUM_PAGE_SIZE (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
|
||||
#define MI_LARGE_PAGE_SIZE (MI_ZU(1)<<MI_LARGE_PAGE_SHIFT)
|
||||
#define MI_ARENA_MIN_OBJ_SIZE MI_ARENA_BLOCK_SIZE
|
||||
#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_CHUNK_BITS * MI_ARENA_BLOCK_SIZE) // for now, cannot cross chunk boundaries
|
||||
|
||||
#define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
|
||||
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
|
||||
#define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
|
||||
#define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE
|
||||
#define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bitmap)
|
||||
#define MI_LARGE_PAGE_SIZE (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE) // 4 MiB (=word in the bitmap)
|
||||
|
||||
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
|
||||
// (Except for large pages since huge objects are allocated in 4MiB chunks)
|
||||
#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 16KiB
|
||||
#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB
|
||||
#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2MiB
|
||||
#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
|
||||
|
||||
// Maximum number of size classes. (spaced exponentially in 12.5% increments)
|
||||
#define MI_BIN_HUGE (73U)
|
||||
|
@ -152,19 +141,55 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#define MI_BIN_COUNT (MI_BIN_FULL+1)
|
||||
|
||||
|
||||
#if (MI_LARGE_OBJ_WSIZE_MAX >= 655360)
|
||||
#error "mimalloc internal: define more bins"
|
||||
#endif
|
||||
|
||||
// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`)
|
||||
#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX)
|
||||
|
||||
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
|
||||
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
|
||||
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated orphan pages
|
||||
#define MI_BLOCK_ALIGNMENT_MAX (MI_ARENA_BLOCK_ALIGN)
|
||||
|
||||
// We never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
|
||||
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
|
||||
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// a memory id tracks the provenance of arena/OS allocated memory
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
|
||||
typedef enum mi_memkind_e {
|
||||
MI_MEM_NONE, // not allocated
|
||||
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
|
||||
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
|
||||
MI_MEM_OS, // allocated from the OS
|
||||
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
|
||||
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
|
||||
MI_MEM_ARENA // allocated from an arena (the usual case)
|
||||
} mi_memkind_t;
|
||||
|
||||
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
|
||||
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
|
||||
}
|
||||
|
||||
typedef struct mi_memid_os_info {
|
||||
void* base; // actual base address of the block (used for offset aligned allocations)
|
||||
size_t alignment; // alignment at allocation
|
||||
} mi_memid_os_info_t;
|
||||
|
||||
typedef struct mi_memid_arena_info {
|
||||
size_t block_index; // index in the arena
|
||||
mi_arena_id_t id; // arena id (>= 1)
|
||||
bool is_exclusive; // this arena can only be used for specific arena allocations
|
||||
} mi_memid_arena_info_t;
|
||||
|
||||
typedef struct mi_memid_s {
|
||||
union {
|
||||
mi_memid_os_info_t os; // only used for MI_MEM_OS
|
||||
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
|
||||
} mem;
|
||||
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
|
||||
bool initially_committed;// `true` if the memory was originally allocated as committed
|
||||
bool initially_zero; // `true` if the memory was originally zero initialized
|
||||
mi_memkind_t memkind;
|
||||
} mi_memid_t;
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Mimalloc pages contain allocated blocks
|
||||
// ------------------------------------------------------
|
||||
|
@ -223,6 +248,10 @@ typedef union mi_page_flags_s {
|
|||
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
|
||||
typedef uintptr_t mi_thread_free_t;
|
||||
|
||||
// Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython)
|
||||
typedef struct mi_subproc_s mi_subproc_t;
|
||||
|
||||
|
||||
// A page contains blocks of one specific size (`block_size`).
|
||||
// Each page has three list of free blocks:
|
||||
// `free` for blocks that can be allocated,
|
||||
|
@ -242,8 +271,6 @@ typedef uintptr_t mi_thread_free_t;
|
|||
// Notes:
|
||||
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
|
||||
// - Using `uint16_t` does not seem to slow things down
|
||||
// - The size is 10 words on 64-bit which helps the page index calculations
|
||||
// (and 12 words on 32-bit, and encoded free lists add 2 words)
|
||||
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
|
||||
// concurrent frees where only the first concurrent free adds to the owning
|
||||
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
|
||||
|
@ -252,15 +279,8 @@ typedef uintptr_t mi_thread_free_t;
|
|||
// the owning heap `thread_delayed_free` list. This guarantees that pages
|
||||
// will be freed correctly even if only other threads free blocks.
|
||||
typedef struct mi_page_s {
|
||||
// "owned" by the segment
|
||||
uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
|
||||
uint8_t segment_in_use:1; // `true` if the segment allocated this page
|
||||
uint8_t is_committed:1; // `true` if the page virtual memory is committed
|
||||
uint8_t is_zero_init:1; // `true` if the page was initially zero initialized
|
||||
uint8_t is_huge:1; // `true` if the page is in a huge segment
|
||||
|
||||
// layout like this to optimize access in `mi_malloc` and `mi_free`
|
||||
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
|
||||
mi_memid_t memid; // provenance of the page memory
|
||||
uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation)
|
||||
uint16_t reserved; // number of blocks reserved in memory
|
||||
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
|
||||
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
|
||||
|
@ -273,119 +293,53 @@ typedef struct mi_page_s {
|
|||
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
|
||||
// padding
|
||||
size_t block_size; // size available in each block (always `>0`)
|
||||
uint8_t* page_start; // start of the page area containing the blocks
|
||||
|
||||
#if (MI_ENCODE_FREELIST || MI_PADDING)
|
||||
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
|
||||
#endif
|
||||
|
||||
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
|
||||
_Atomic(uintptr_t) xheap;
|
||||
_Atomic(uintptr_t) xheap; // heap this threads belong to.
|
||||
_Atomic(mi_threadid_t)xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned)
|
||||
|
||||
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
|
||||
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
|
||||
|
||||
#if MI_INTPTR_SIZE==4 // pad to 12 words on 32-bit
|
||||
void* padding[1];
|
||||
#endif
|
||||
} mi_page_t;
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Object sizes
|
||||
// ------------------------------------------------------
|
||||
|
||||
#define MI_PAGE_ALIGN (64)
|
||||
#define MI_PAGE_INFO_SIZE (MI_SIZE_SHIFT*MI_PAGE_ALIGN) // should be > sizeof(mi_page_t)
|
||||
|
||||
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
|
||||
// (Except for large pages since huge objects are allocated in 4MiB chunks)
|
||||
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // ~16KiB
|
||||
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // ~128KiB
|
||||
#define MI_LARGE_MAX_OBJ_SIZE ((MI_LARGE_PAGE_SIZE-MI_PAGE_INFO_SIZE)/2) // ~2MiB
|
||||
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
|
||||
|
||||
|
||||
#if (MI_LARGE_MAX_OBJ_WSIZE >= 655360)
|
||||
#error "mimalloc internal: define more bins"
|
||||
#endif
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Mimalloc segments contain mimalloc pages
|
||||
// Page kinds
|
||||
// ------------------------------------------------------
|
||||
|
||||
typedef enum mi_page_kind_e {
|
||||
MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
|
||||
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment
|
||||
MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment
|
||||
MI_PAGE_HUGE // a huge page is a single page in a segment of variable size (but still 2MiB aligned)
|
||||
MI_PAGE_SMALL, // small blocks go into 64KiB pages
|
||||
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages
|
||||
MI_PAGE_LARGE, // larger blocks go into 4MiB pages
|
||||
MI_PAGE_SINGLETON // page containing a single block.
|
||||
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`.
|
||||
} mi_page_kind_t;
|
||||
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// a memory id tracks the provenance of arena/OS allocated memory
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
|
||||
typedef enum mi_memkind_e {
|
||||
MI_MEM_NONE, // not allocated
|
||||
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
|
||||
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
|
||||
MI_MEM_OS, // allocated from the OS
|
||||
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
|
||||
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
|
||||
MI_MEM_ARENA // allocated from an arena (the usual case)
|
||||
} mi_memkind_t;
|
||||
|
||||
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
|
||||
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
|
||||
}
|
||||
|
||||
typedef struct mi_memid_os_info {
|
||||
void* base; // actual base address of the block (used for offset aligned allocations)
|
||||
size_t alignment; // alignment at allocation
|
||||
} mi_memid_os_info_t;
|
||||
|
||||
typedef struct mi_memid_arena_info {
|
||||
size_t block_index; // index in the arena
|
||||
mi_arena_id_t id; // arena id (>= 1)
|
||||
bool is_exclusive; // this arena can only be used for specific arena allocations
|
||||
} mi_memid_arena_info_t;
|
||||
|
||||
typedef struct mi_memid_s {
|
||||
union {
|
||||
mi_memid_os_info_t os; // only used for MI_MEM_OS
|
||||
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
|
||||
} mem;
|
||||
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
|
||||
bool initially_committed;// `true` if the memory was originally allocated as committed
|
||||
bool initially_zero; // `true` if the memory was originally zero initialized
|
||||
mi_memkind_t memkind;
|
||||
} mi_memid_t;
|
||||
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Segments contain mimalloc pages
|
||||
// ---------------------------------------------------------------
|
||||
typedef struct mi_subproc_s mi_subproc_t;
|
||||
|
||||
// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS.
|
||||
// Inside segments we allocated fixed size _pages_ that contain blocks.
|
||||
typedef struct mi_segment_s {
|
||||
// constant fields
|
||||
mi_memid_t memid; // memory id to track provenance
|
||||
bool allow_decommit;
|
||||
bool allow_purge;
|
||||
size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE`
|
||||
mi_subproc_t* subproc; // segment belongs to sub process
|
||||
|
||||
// segment fields
|
||||
struct mi_segment_s* next; // must be the first (non-constant) segment field -- see `segment.c:segment_init`
|
||||
struct mi_segment_s* prev;
|
||||
bool was_reclaimed; // true if it was reclaimed (used to limit reclaim-on-free reclamation)
|
||||
bool dont_free; // can be temporarily true to ensure the segment is not freed
|
||||
|
||||
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
|
||||
size_t abandoned_visits; // count how often this segment is visited for reclaiming (to force reclaim if it is too long)
|
||||
|
||||
size_t used; // count of pages in use (`used <= capacity`)
|
||||
size_t capacity; // count of available pages (`#free + used`)
|
||||
size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages.
|
||||
uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
|
||||
|
||||
struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled
|
||||
struct mi_segment_s* abandoned_os_prev;
|
||||
|
||||
// layout like this to optimize access in `mi_free`
|
||||
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
|
||||
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
|
||||
mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge
|
||||
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
|
||||
} mi_segment_t;
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Heaps
|
||||
|
@ -522,21 +476,18 @@ typedef struct mi_stat_counter_s {
|
|||
} mi_stat_counter_t;
|
||||
|
||||
typedef struct mi_stats_s {
|
||||
mi_stat_count_t segments;
|
||||
mi_stat_count_t pages;
|
||||
mi_stat_count_t reserved;
|
||||
mi_stat_count_t committed;
|
||||
mi_stat_count_t reset;
|
||||
mi_stat_count_t purged;
|
||||
mi_stat_count_t page_committed;
|
||||
mi_stat_count_t segments_abandoned;
|
||||
mi_stat_count_t pages_abandoned;
|
||||
mi_stat_count_t threads;
|
||||
mi_stat_count_t normal;
|
||||
mi_stat_count_t huge;
|
||||
mi_stat_count_t giant;
|
||||
mi_stat_count_t malloc;
|
||||
mi_stat_count_t segments_cache;
|
||||
mi_stat_counter_t pages_extended;
|
||||
mi_stat_counter_t mmap_calls;
|
||||
mi_stat_counter_t commit_calls;
|
||||
|
@ -581,12 +532,12 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
|
|||
// ------------------------------------------------------
|
||||
|
||||
struct mi_subproc_s {
|
||||
_Atomic(size_t) abandoned_count; // count of abandoned segments for this sub-process
|
||||
_Atomic(size_t) abandoned_os_list_count; // count of abandoned segments in the os-list
|
||||
mi_lock_t abandoned_os_lock; // lock for the abandoned os segment list (outside of arena's) (this lock protect list operations)
|
||||
_Atomic(size_t) abandoned_count; // count of abandoned pages for this sub-process
|
||||
_Atomic(size_t) abandoned_os_list_count; // count of abandoned pages in the os-list
|
||||
mi_lock_t abandoned_os_lock; // lock for the abandoned os pages list (outside of arena's) (this lock protect list operations)
|
||||
mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list
|
||||
mi_segment_t* abandoned_os_list; // doubly-linked list of abandoned segments outside of arena's (in OS allocated memory)
|
||||
mi_segment_t* abandoned_os_list_tail; // the tail-end of the list
|
||||
mi_page_t* abandoned_os_list; // doubly-linked list of abandoned pages outside of arena's (in OS allocated memory)
|
||||
mi_page_t* abandoned_os_list_tail; // the tail-end of the list
|
||||
mi_memid_t memid; // provenance of this memory block
|
||||
};
|
||||
|
||||
|
@ -597,11 +548,6 @@ struct mi_subproc_s {
|
|||
// Milliseconds as in `int64_t` to avoid overflows
|
||||
typedef int64_t mi_msecs_t;
|
||||
|
||||
// Queue of segments
|
||||
typedef struct mi_segment_queue_s {
|
||||
mi_segment_t* first;
|
||||
mi_segment_t* last;
|
||||
} mi_segment_queue_t;
|
||||
|
||||
// OS thread local data
|
||||
typedef struct mi_os_tld_s {
|
||||
|
@ -609,28 +555,13 @@ typedef struct mi_os_tld_s {
|
|||
mi_stats_t* stats; // points to tld stats
|
||||
} mi_os_tld_t;
|
||||
|
||||
// Segments thread local data
|
||||
typedef struct mi_segments_tld_s {
|
||||
mi_segment_queue_t small_free; // queue of segments with free small pages
|
||||
mi_segment_queue_t medium_free; // queue of segments with free medium pages
|
||||
mi_page_queue_t pages_purge; // queue of freed pages that are delay purged
|
||||
size_t count; // current number of segments;
|
||||
size_t peak_count; // peak number of segments
|
||||
size_t current_size; // current size of all segments
|
||||
size_t peak_size; // peak size of all segments
|
||||
size_t reclaim_count;// number of reclaimed (abandoned) segments
|
||||
mi_subproc_t* subproc; // sub-process this thread belongs to.
|
||||
mi_stats_t* stats; // points to tld stats
|
||||
mi_os_tld_t* os; // points to os tld
|
||||
} mi_segments_tld_t;
|
||||
|
||||
// Thread local data
|
||||
struct mi_tld_s {
|
||||
unsigned long long heartbeat; // monotonic heartbeat count
|
||||
bool recurse; // true if deferred was called; used to prevent infinite recursion.
|
||||
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
|
||||
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
|
||||
mi_segments_tld_t segments; // segment tld
|
||||
mi_subproc_t* subproc; // sub-process this thread belongs to.
|
||||
mi_os_tld_t os; // os tld
|
||||
mi_stats_t stats; // statistics
|
||||
};
|
||||
|
|
|
@ -82,7 +82,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
|
|||
|
||||
#if (MI_STAT>0)
|
||||
const size_t bsize = mi_page_usable_block_size(page);
|
||||
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
|
||||
mi_heap_stat_increase(heap, normal, bsize);
|
||||
mi_heap_stat_counter_increase(heap, normal_count, 1);
|
||||
#if (MI_STAT>1)
|
||||
|
|
File diff suppressed because it is too large
Load diff
869
src/arena.c
869
src/arena.c
File diff suppressed because it is too large
Load diff
419
src/bitmap-old.c
Normal file
419
src/bitmap-old.c
Normal file
|
@ -0,0 +1,419 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
|
||||
The `_across` postfixed functions do allow sequences that can cross over
|
||||
between the fields. (This is used in arena allocation)
|
||||
---------------------------------------------------------------------------- */
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/bits.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// The bit mask for a given number of blocks at a specified bit index.
|
||||
static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
||||
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
mi_assert_internal(count > 0);
|
||||
if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
|
||||
if (count == 0) return 0;
|
||||
return ((((size_t)1 << count) - 1) << bitidx);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t map = mi_atomic_load_relaxed(field);
|
||||
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
|
||||
|
||||
// search for 0-bit sequence of length count
|
||||
const size_t mask = mi_bitmap_mask_(count, 0);
|
||||
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
||||
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
|
||||
#else
|
||||
size_t bitidx = 0; // otherwise start at 0
|
||||
#endif
|
||||
size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
|
||||
|
||||
// scan linearly for a free range of zero bits
|
||||
while (bitidx <= bitidx_max) {
|
||||
const size_t mapm = (map & m);
|
||||
if (mapm == 0) { // are the mask bits free at bitidx?
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
const size_t newmap = (map | m);
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
|
||||
// no success, another thread claimed concurrently.. keep going (with updated `map`)
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// success, we claimed the bits!
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// on to the next bit range
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
mi_assert_internal(mapm != 0);
|
||||
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
|
||||
mi_assert_internal(shift > 0 && shift <= count);
|
||||
#else
|
||||
const size_t shift = 1;
|
||||
#endif
|
||||
bitidx += shift;
|
||||
m <<= shift;
|
||||
}
|
||||
}
|
||||
// no bits found
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
|
||||
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
|
||||
size_t idx = start_field_idx;
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
// mi_assert_internal((bitmap[idx] & mask) == mask);
|
||||
const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
|
||||
return ((prev & mask) == mask);
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
|
||||
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
|
||||
if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
|
||||
return ((prev & mask) == 0);
|
||||
}
|
||||
|
||||
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
|
||||
return ((field & mask) == mask);
|
||||
}
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
do {
|
||||
if ((expected & mask) != 0) return false;
|
||||
}
|
||||
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
|
||||
mi_assert_internal((expected & mask) == 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
return any_ones;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// the `_across` functions work on bitmaps where sequences can cross over
|
||||
// between the fields. This is used in arena allocation
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits starting from the field
|
||||
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
|
||||
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
|
||||
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
|
||||
// check initial trailing zeros
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t map = mi_atomic_load_relaxed(field);
|
||||
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
|
||||
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
|
||||
if (initial == 0) return false;
|
||||
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
|
||||
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
|
||||
|
||||
// scan ahead
|
||||
size_t found = initial;
|
||||
size_t mask = 0; // mask bits for the final field
|
||||
while(found < count) {
|
||||
field++;
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
|
||||
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
|
||||
mask = mi_bitmap_mask_(mask_bits, 0);
|
||||
if ((map & mask) != 0) return false; // some part is already claimed
|
||||
found += mask_bits;
|
||||
}
|
||||
mi_assert_internal(field < &bitmap[bitmap_fields]);
|
||||
|
||||
// we found a range of contiguous zeros up to the final field; mask contains mask in the final field
|
||||
// now try to claim the range atomically
|
||||
mi_bitmap_field_t* const final_field = field;
|
||||
const size_t final_mask = mask;
|
||||
mi_bitmap_field_t* const initial_field = &bitmap[idx];
|
||||
const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
|
||||
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
|
||||
|
||||
// initial field
|
||||
size_t newmap;
|
||||
field = initial_field;
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
newmap = (map | initial_mask);
|
||||
if ((map & initial_mask) != 0) { goto rollback; };
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// intermediate fields
|
||||
while (++field < final_field) {
|
||||
newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
|
||||
map = 0;
|
||||
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
|
||||
}
|
||||
|
||||
// final field
|
||||
mi_assert_internal(field == final_field);
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
newmap = (map | final_mask);
|
||||
if ((map & final_mask) != 0) { goto rollback; }
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// claimed!
|
||||
mi_stat_counter_increase(stats->arena_crossover_count,1);
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
|
||||
return true;
|
||||
|
||||
rollback:
|
||||
// roll back intermediate fields
|
||||
// (we just failed to claim `field` so decrement first)
|
||||
while (--field > initial_field) {
|
||||
newmap = 0;
|
||||
map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
|
||||
mi_assert_internal(mi_atomic_load_relaxed(field) == map);
|
||||
mi_atomic_store_release(field, newmap);
|
||||
}
|
||||
if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
mi_assert_internal((map & initial_mask) == initial_mask);
|
||||
newmap = (map & ~initial_mask);
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
}
|
||||
mi_stat_counter_increase(stats->arena_rollback_count,1);
|
||||
// retry? (we make a recursive call instead of goto to be able to use const declarations)
|
||||
if (retries <= 2) {
|
||||
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
|
||||
mi_assert_internal(count > 0);
|
||||
if (count <= 2) {
|
||||
// we don't bother with crossover fields for small counts
|
||||
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
|
||||
}
|
||||
|
||||
// visit the fields
|
||||
size_t idx = start_field_idx;
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
// first try to claim inside a field
|
||||
/*
|
||||
if (count <= MI_BITMAP_FIELD_BITS) {
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
*/
|
||||
// if that fails, then try to claim across fields
|
||||
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper for masks across fields; returns the mid count, post_mask may be 0
|
||||
static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
|
||||
MI_UNUSED(bitmap_fields);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
|
||||
*pre_mask = mi_bitmap_mask_(count, bitidx);
|
||||
*mid_mask = 0;
|
||||
*post_mask = 0;
|
||||
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
|
||||
mi_assert_internal(pre_bits < count);
|
||||
*pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
|
||||
count -= pre_bits;
|
||||
const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
|
||||
*mid_mask = MI_BITMAP_FIELD_FULL;
|
||||
count %= MI_BITMAP_FIELD_BITS;
|
||||
*post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
|
||||
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
|
||||
return mid_count;
|
||||
}
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_one = true;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
|
||||
if ((prev & pre_mask) != pre_mask) all_one = false;
|
||||
while(mid_count-- > 0) {
|
||||
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
|
||||
if ((prev & mid_mask) != mid_mask) all_one = false;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
|
||||
if ((prev & post_mask) != post_mask) all_one = false;
|
||||
}
|
||||
return all_one;
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_zero = true;
|
||||
bool any_zero = false;
|
||||
_Atomic(size_t)*field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
|
||||
if ((prev & pre_mask) != 0) all_zero = false;
|
||||
if ((prev & pre_mask) != pre_mask) any_zero = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_or_acq_rel(field++, mid_mask);
|
||||
if ((prev & mid_mask) != 0) all_zero = false;
|
||||
if ((prev & mid_mask) != mid_mask) any_zero = true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_or_acq_rel(field, post_mask);
|
||||
if ((prev & post_mask) != 0) all_zero = false;
|
||||
if ((prev & post_mask) != post_mask) any_zero = true;
|
||||
}
|
||||
if (pany_zero != NULL) { *pany_zero = any_zero; }
|
||||
return all_zero;
|
||||
}
|
||||
|
||||
|
||||
// Returns `true` if all `count` bits were 1.
|
||||
// `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_ones = true;
|
||||
bool any_ones = false;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & pre_mask) != pre_mask) all_ones = false;
|
||||
if ((prev & pre_mask) != 0) any_ones = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & mid_mask) != mid_mask) all_ones = false;
|
||||
if ((prev & mid_mask) != 0) any_ones = true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_load_relaxed(field);
|
||||
if ((prev & post_mask) != post_mask) all_ones = false;
|
||||
if ((prev & post_mask) != 0) any_ones = true;
|
||||
}
|
||||
if (pany_ones != NULL) { *pany_ones = any_ones; }
|
||||
return all_ones;
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
return any_ones;
|
||||
}
|
110
src/bitmap-old.h
Normal file
110
src/bitmap-old.h
Normal file
|
@ -0,0 +1,110 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
(this is used in region allocation)
|
||||
|
||||
The `_across` postfixed functions do allow sequences that can cross over
|
||||
between the fields. (This is used in arena allocation)
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_BITMAP_H
|
||||
#define MI_BITMAP_H
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
|
||||
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
|
||||
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
|
||||
|
||||
// An atomic bitmap of `size_t` fields
|
||||
typedef _Atomic(size_t) mi_bitmap_field_t;
|
||||
typedef mi_bitmap_field_t* mi_bitmap_t;
|
||||
|
||||
// A bitmap index is the index of the bit in a bitmap.
|
||||
typedef size_t mi_bitmap_index_t;
|
||||
|
||||
// Create a bit index.
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
|
||||
}
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
|
||||
return mi_bitmap_index_create_ex(idx,bitidx);
|
||||
}
|
||||
|
||||
// Get the field index from a bit index.
|
||||
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the bit index in a bitmap field
|
||||
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the full bit index
|
||||
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
|
||||
return bitmap_idx;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
|
||||
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
|
||||
|
||||
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// the `_across` functions work on bitmaps where sequences can cross over
|
||||
// between the fields. This is used in arena allocation
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
#endif
|
868
src/bitmap.c
868
src/bitmap.c
|
@ -1,19 +1,12 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
|
||||
The `_across` postfixed functions do allow sequences that can cross over
|
||||
between the fields. (This is used in arena allocation)
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
|
||||
#include "mimalloc.h"
|
||||
|
@ -21,399 +14,586 @@ between the fields. (This is used in arena allocation)
|
|||
#include "mimalloc/bits.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
/* --------------------------------------------------------------------------------
|
||||
bfields
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// The bit mask for a given number of blocks at a specified bit index.
|
||||
static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
||||
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
mi_assert_internal(count > 0);
|
||||
if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
|
||||
if (count == 0) return 0;
|
||||
return ((((size_t)1 << count) - 1) << bitidx);
|
||||
static inline size_t mi_bfield_ctz(mi_bfield_t x) {
|
||||
return mi_ctz(x);
|
||||
}
|
||||
|
||||
static inline size_t mi_bfield_clz(mi_bfield_t x) {
|
||||
return mi_clz(x);
|
||||
}
|
||||
|
||||
// find the least significant bit that is set (i.e. count trailing zero's)
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
|
||||
return mi_bsf(x,idx);
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
|
||||
return mi_rotr(x,r);
|
||||
}
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t map = mi_atomic_load_relaxed(field);
|
||||
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
|
||||
|
||||
// search for 0-bit sequence of length count
|
||||
const size_t mask = mi_bitmap_mask_(count, 0);
|
||||
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
||||
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
|
||||
#else
|
||||
size_t bitidx = 0; // otherwise start at 0
|
||||
#endif
|
||||
size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
|
||||
|
||||
// scan linearly for a free range of zero bits
|
||||
while (bitidx <= bitidx_max) {
|
||||
const size_t mapm = (map & m);
|
||||
if (mapm == 0) { // are the mask bits free at bitidx?
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
const size_t newmap = (map | m);
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
|
||||
// no success, another thread claimed concurrently.. keep going (with updated `map`)
|
||||
continue;
|
||||
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
|
||||
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
|
||||
if (set) {
|
||||
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else {
|
||||
// success, we claimed the bits!
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
|
||||
return true;
|
||||
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// on to the next bit range
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
mi_assert_internal(mapm != 0);
|
||||
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
|
||||
mi_assert_internal(shift > 0 && shift <= count);
|
||||
#else
|
||||
const size_t shift = 1;
|
||||
#endif
|
||||
bitidx += shift;
|
||||
m <<= shift;
|
||||
|
||||
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
|
||||
// `already_xset` is true if all bits for the mask were already set/cleared.
|
||||
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
|
||||
*already_xset = ((old&mask) == mask);
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
|
||||
*already_xset = ((old&mask) == 0);
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
// no bits found
|
||||
return false;
|
||||
|
||||
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
|
||||
return mi_bfield_atomic_xset(set, b, idx);
|
||||
}
|
||||
|
||||
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
|
||||
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
|
||||
size_t idx = start_field_idx;
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
// mi_assert_internal((bitmap[idx] & mask) == mask);
|
||||
const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
|
||||
return ((prev & mask) == mask);
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
|
||||
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
|
||||
if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
|
||||
return ((prev & mask) == 0);
|
||||
}
|
||||
|
||||
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
|
||||
return ((field & mask) == mask);
|
||||
}
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((expected & mask) != 0) return false;
|
||||
}
|
||||
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
|
||||
mi_assert_internal((expected & mask) == 0);
|
||||
if ((old&mask) != 0) return false; // the mask bits are no longer 0
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
|
||||
return true;
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != mask) return false; // the mask bits are no longer set
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
return any_ones;
|
||||
// Check if all bits corresponding to a mask are set/cleared.
|
||||
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
return ((*b & mask) == mask);
|
||||
}
|
||||
else {
|
||||
return ((*b & mask) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)* b, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
|
||||
return mi_bfield_atomic_try_xset_mask(set,b,mask);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// the `_across` functions work on bitmaps where sequences can cross over
|
||||
// between the fields. This is used in arena allocation
|
||||
//--------------------------------------------------------------------------
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap chunks
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits starting from the field
|
||||
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
|
||||
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
|
||||
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
|
||||
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = cidx / MI_BFIELD_BITS;
|
||||
const size_t idx = cidx % MI_BFIELD_BITS;
|
||||
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
|
||||
}
|
||||
|
||||
// check initial trailing zeros
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t map = mi_atomic_load_relaxed(field);
|
||||
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
|
||||
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
|
||||
if (initial == 0) return false;
|
||||
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
|
||||
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
|
||||
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = byte_idx / MI_BFIELD_SIZE;
|
||||
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
|
||||
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
|
||||
}
|
||||
|
||||
// scan ahead
|
||||
size_t found = initial;
|
||||
size_t mask = 0; // mask bits for the final field
|
||||
while(found < count) {
|
||||
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_transition = true;
|
||||
bool all_already_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
bool already_xset;
|
||||
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
|
||||
all_already_xset = all_already_xset && already_xset;
|
||||
// next field
|
||||
field++;
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
|
||||
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
|
||||
mask = mi_bitmap_mask_(mask_bits, 0);
|
||||
if ((map & mask) != 0) return false; // some part is already claimed
|
||||
found += mask_bits;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
mi_assert_internal(field < &bitmap[bitmap_fields]);
|
||||
|
||||
// we found a range of contiguous zeros up to the final field; mask contains mask in the final field
|
||||
// now try to claim the range atomically
|
||||
mi_bitmap_field_t* const final_field = field;
|
||||
const size_t final_mask = mask;
|
||||
mi_bitmap_field_t* const initial_field = &bitmap[idx];
|
||||
const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
|
||||
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
|
||||
|
||||
// initial field
|
||||
size_t newmap;
|
||||
field = initial_field;
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
newmap = (map | initial_mask);
|
||||
if ((map & initial_mask) != 0) { goto rollback; };
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// intermediate fields
|
||||
while (++field < final_field) {
|
||||
newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
|
||||
map = 0;
|
||||
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
|
||||
*palready_xset = all_already_xset;
|
||||
return all_transition;
|
||||
}
|
||||
|
||||
// final field
|
||||
mi_assert_internal(field == final_field);
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
newmap = (map | final_mask);
|
||||
if ((map & final_mask) != 0) { goto rollback; }
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
||||
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
all_xset = all_xset && mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask);
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
return all_xset;
|
||||
}
|
||||
|
||||
// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving all bit fields as is.
|
||||
static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
if (n==0) return true;
|
||||
size_t start_idx = cidx % MI_BFIELD_BITS;
|
||||
size_t start_field = cidx / MI_BFIELD_BITS;
|
||||
size_t end_field = MI_BITMAP_CHUNK_FIELDS;
|
||||
size_t mask_mid = 0;
|
||||
size_t mask_end = 0;
|
||||
|
||||
// first field
|
||||
size_t field = start_field;
|
||||
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << start_idx);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false;
|
||||
|
||||
// done?
|
||||
n -= m;
|
||||
if (n==0) return true;
|
||||
|
||||
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
|
||||
|
||||
// mid fields
|
||||
while (n >= MI_BFIELD_BITS) {
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
mask_mid = ~MI_ZU(0);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
|
||||
n -= MI_BFIELD_BITS;
|
||||
}
|
||||
|
||||
// last field
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BFIELD_BITS);
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
end_field = field;
|
||||
mask_end = (MI_ZU(1)<<n)-1;
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore;
|
||||
}
|
||||
|
||||
// claimed!
|
||||
mi_stat_counter_increase(stats->arena_crossover_count,1);
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
|
||||
return true;
|
||||
|
||||
rollback:
|
||||
// roll back intermediate fields
|
||||
// (we just failed to claim `field` so decrement first)
|
||||
while (--field > initial_field) {
|
||||
newmap = 0;
|
||||
map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
|
||||
mi_assert_internal(mi_atomic_load_relaxed(field) == map);
|
||||
mi_atomic_store_release(field, newmap);
|
||||
restore:
|
||||
// field is on the field that failed to set atomically; we need to restore all previous fields
|
||||
mi_assert_internal(field > start_field);
|
||||
while( field > start_field) {
|
||||
field--;
|
||||
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
|
||||
bool already_xset;
|
||||
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
|
||||
}
|
||||
if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
mi_assert_internal((map & initial_mask) == initial_mask);
|
||||
newmap = (map & ~initial_mask);
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
}
|
||||
mi_stat_counter_increase(stats->arena_rollback_count,1);
|
||||
// retry? (we make a recursive call instead of goto to be able to use const declarations)
|
||||
if (retries <= 2) {
|
||||
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
|
||||
mi_assert_internal(count > 0);
|
||||
if (count <= 2) {
|
||||
// we don't bother with crossover fields for small counts
|
||||
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
|
||||
}
|
||||
|
||||
// visit the fields
|
||||
size_t idx = start_field_idx;
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
// first try to claim inside a field
|
||||
/*
|
||||
if (count <= MI_BITMAP_FIELD_BITS) {
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
// find least 1-bit in a chunk and try unset it atomically
|
||||
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
|
||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
|
||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
|
||||
mi_assert_internal(mask != 0);
|
||||
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
size_t cidx;
|
||||
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
*/
|
||||
// if that fails, then try to claim across fields
|
||||
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
|
||||
*pidx = (i*MI_BFIELD_BITS + idx);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find least byte in a chunk with all bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
|
||||
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
|
||||
if (mask == 0) return false;
|
||||
const size_t i = _tzcnt_u32(mask);
|
||||
mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t chunk_idx = i / MI_BFIELD_SIZE;
|
||||
const size_t byte_idx = i % MI_BFIELD_SIZE;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
const mi_bfield_t x = chunk->bfields[i];
|
||||
// has_set8 has low bit in each byte set if the byte in x == 0xFF
|
||||
const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
|
||||
(x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
|
||||
>> 7; // shift high bit to low bit
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
|
||||
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
|
||||
mi_assert_internal((idx%8)==0);
|
||||
const size_t byte_idx = idx/8;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
|
||||
*pidx = (i*MI_BFIELD_BITS) + idx;
|
||||
mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// else continue
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
|
||||
// todo: try avx2 and neon version
|
||||
// todo: allow spanning across bfield boundaries?
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
|
||||
const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
mi_bfield_t b = chunk->bfields[i];
|
||||
size_t bshift = 0;
|
||||
size_t idx;
|
||||
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
||||
b >>= idx;
|
||||
bshift += idx;
|
||||
if (bshift + n >= MI_BFIELD_BITS) break;
|
||||
|
||||
if ((b&mask) == mask) { // found a match
|
||||
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
|
||||
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
|
||||
*pidx = (i*MI_BFIELD_BITS) + bshift;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// if failed to atomically commit, try again from this position
|
||||
b = (chunk->bfields[i] >> bshift);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// advance
|
||||
const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
|
||||
mi_assert_internal(ones>0);
|
||||
bshift += ones;
|
||||
b >>= ones;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper for masks across fields; returns the mid count, post_mask may be 0
|
||||
static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
|
||||
MI_UNUSED(bitmap_fields);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
|
||||
*pre_mask = mi_bitmap_mask_(count, bitidx);
|
||||
*mid_mask = 0;
|
||||
*post_mask = 0;
|
||||
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
|
||||
return 0;
|
||||
|
||||
// are all bits in a bitmap chunk set?
|
||||
static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_test_all_ones(vec);
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x & chunk->bfields[i];
|
||||
}
|
||||
return (~x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
// are all bits in a bitmap chunk clear?
|
||||
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_testz_si256( vec, vec );
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x | chunk->bfields[i];
|
||||
}
|
||||
return (x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
|
||||
if (!already_zero) {
|
||||
_mi_memzero_aligned(bitmap, sizeof(*bitmap));
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
|
||||
|
||||
// first chunk
|
||||
size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
|
||||
if (m > n) { m = n; }
|
||||
bool already_xset;
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
|
||||
|
||||
// n can be large so use memset for efficiency for all in-between chunks
|
||||
chunk_idx++;
|
||||
n -= m;
|
||||
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
|
||||
if (mid_chunks > 0) {
|
||||
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
|
||||
chunk_idx += mid_chunks;
|
||||
n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
|
||||
}
|
||||
|
||||
// last chunk
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
|
||||
}
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
mi_assert_internal(idx%8 == 0);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
|
||||
return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
|
||||
if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
bool local_already_xset;
|
||||
if (already_xset==NULL) { already_xset = &local_already_xset; }
|
||||
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
|
||||
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
|
||||
}
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
|
||||
#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
|
||||
{ size_t _set_idx; \
|
||||
size_t _start = start % MI_BFIELD_BITS; \
|
||||
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
|
||||
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
|
||||
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
|
||||
|
||||
#define mi_bitmap_forall_set_chunks_end() \
|
||||
_start += _set_idx+1; /* so chunk_idx stays valid */ \
|
||||
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
|
||||
_any_set >>= 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
|
||||
mi_assert_internal(pre_bits < count);
|
||||
*pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
|
||||
count -= pre_bits;
|
||||
const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
|
||||
*mid_mask = MI_BITMAP_FIELD_FULL;
|
||||
count %= MI_BITMAP_FIELD_BITS;
|
||||
*post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
|
||||
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
|
||||
return mid_count;
|
||||
// we may find that all are unset only on a second iteration but that is ok as
|
||||
// _any_set is a conservative approximation.
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_one = true;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
|
||||
if ((prev & pre_mask) != pre_mask) all_one = false;
|
||||
while(mid_count-- > 0) {
|
||||
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
|
||||
if ((prev & mid_mask) != mid_mask) all_one = false;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
|
||||
if ((prev & post_mask) != post_mask) all_one = false;
|
||||
}
|
||||
return all_one;
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_zero = true;
|
||||
bool any_zero = false;
|
||||
_Atomic(size_t)*field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
|
||||
if ((prev & pre_mask) != 0) all_zero = false;
|
||||
if ((prev & pre_mask) != pre_mask) any_zero = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_or_acq_rel(field++, mid_mask);
|
||||
if ((prev & mid_mask) != 0) all_zero = false;
|
||||
if ((prev & mid_mask) != mid_mask) any_zero = true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_or_acq_rel(field, post_mask);
|
||||
if ((prev & post_mask) != 0) all_zero = false;
|
||||
if ((prev & post_mask) != post_mask) any_zero = true;
|
||||
}
|
||||
if (pany_zero != NULL) { *pany_zero = any_zero; }
|
||||
return all_zero;
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Returns `true` if all `count` bits were 1.
|
||||
// `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_ones = true;
|
||||
bool any_ones = false;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & pre_mask) != pre_mask) all_ones = false;
|
||||
if ((prev & pre_mask) != 0) any_ones = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & mid_mask) != mid_mask) all_ones = false;
|
||||
if ((prev & mid_mask) != 0) any_ones = true;
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
|
||||
mi_assert_internal((*pidx % 8) == 0);
|
||||
return true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_load_relaxed(field);
|
||||
if ((prev & post_mask) != post_mask) all_ones = false;
|
||||
if ((prev & post_mask) != 0) any_ones = true;
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
if (pany_ones != NULL) { *pany_ones = any_ones; }
|
||||
return all_ones;
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
|
||||
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
|
||||
// TODO: allow spanning across chunk boundaries
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false;
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
return any_ones;
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
|
154
src/bitmap.h
154
src/bitmap.h
|
@ -6,105 +6,87 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
(this is used in region allocation)
|
||||
|
||||
The `_across` postfixed functions do allow sequences that can cross over
|
||||
between the fields. (This is used in arena allocation)
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_BITMAP_H
|
||||
#define MI_BITMAP_H
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
/* --------------------------------------------------------------------------------
|
||||
Definitions
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
|
||||
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
|
||||
typedef size_t mi_bfield_t;
|
||||
|
||||
// An atomic bitmap of `size_t` fields
|
||||
typedef _Atomic(size_t) mi_bitmap_field_t;
|
||||
typedef mi_bitmap_field_t* mi_bitmap_t;
|
||||
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
|
||||
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
|
||||
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
|
||||
#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
|
||||
#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
|
||||
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
|
||||
|
||||
// A bitmap index is the index of the bit in a bitmap.
|
||||
typedef size_t mi_bitmap_index_t;
|
||||
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
|
||||
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
|
||||
|
||||
// Create a bit index.
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
|
||||
}
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
|
||||
return mi_bitmap_index_create_ex(idx,bitidx);
|
||||
}
|
||||
|
||||
// Get the field index from a bit index.
|
||||
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the bit index in a bitmap field
|
||||
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the full bit index
|
||||
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
|
||||
return bitmap_idx;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
|
||||
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
|
||||
|
||||
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
|
||||
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
|
||||
} mi_bitmap_chunk_t;
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// the `_across` functions work on bitmaps where sequences can cross over
|
||||
// between the fields. This is used in arena allocation
|
||||
//--------------------------------------------------------------------------
|
||||
typedef mi_decl_align(32) struct mi_bitmap_s {
|
||||
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
|
||||
_Atomic(mi_bfield_t)any_set;
|
||||
} mi_bitmap_t;
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
|
||||
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
/* --------------------------------------------------------------------------------
|
||||
Bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
|
||||
typedef bool mi_bit_t;
|
||||
#define MI_BIT_SET (true)
|
||||
#define MI_BIT_CLEAR (false)
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
|
||||
|
||||
#endif
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
|
||||
|
||||
#endif // MI_XBITMAP_H
|
||||
|
|
104
src/free.c
104
src/free.c
|
@ -24,7 +24,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
|
|||
// ------------------------------------------------------
|
||||
|
||||
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
|
||||
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
|
||||
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_block_t* block);
|
||||
|
||||
// regular free of a (thread local) block pointer
|
||||
// fast path written carefully to prevent spilling on the stack
|
||||
|
@ -57,7 +57,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
|
|||
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
|
||||
mi_assert_internal(page!=NULL && p!=NULL);
|
||||
|
||||
size_t diff = (uint8_t*)p - page->page_start;
|
||||
size_t diff = (uint8_t*)p - mi_page_start(page);
|
||||
size_t adjust;
|
||||
if mi_likely(page->block_size_shift != 0) {
|
||||
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
|
||||
|
@ -82,72 +82,55 @@ static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, vo
|
|||
#endif
|
||||
|
||||
// free a local pointer (page parameter comes first for better codegen)
|
||||
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
|
||||
MI_UNUSED(segment);
|
||||
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_attr_noexcept {
|
||||
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
|
||||
mi_block_check_unguard(page, block, p);
|
||||
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
|
||||
}
|
||||
|
||||
// free a pointer owned by another thread (page parameter comes first for better codegen)
|
||||
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
|
||||
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept {
|
||||
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
|
||||
mi_block_check_unguard(page, block, p);
|
||||
mi_free_block_mt(page, segment, block);
|
||||
mi_free_block_mt(page, block);
|
||||
}
|
||||
|
||||
// generic free (for runtime integration)
|
||||
void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
|
||||
if (is_local) mi_free_generic_local(page,segment,p);
|
||||
else mi_free_generic_mt(page,segment,p);
|
||||
void mi_decl_noinline _mi_free_generic(mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
|
||||
if (is_local) mi_free_generic_local(page,p);
|
||||
else mi_free_generic_mt(page,p);
|
||||
}
|
||||
|
||||
// Get the segment data belonging to a pointer
|
||||
// This is just a single `and` in release mode but does further checks in debug mode
|
||||
// (and secure mode) to see if this was a valid pointer.
|
||||
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
|
||||
static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg)
|
||||
{
|
||||
MI_UNUSED(msg);
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
MI_UNUSED_RELEASE(msg);
|
||||
#if MI_DEBUG
|
||||
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
|
||||
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
mi_segment_t* const segment = _mi_ptr_segment(p);
|
||||
if mi_unlikely(segment==NULL) return segment;
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
if mi_unlikely(!mi_is_in_heap_region(p)) {
|
||||
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
|
||||
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
|
||||
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
|
||||
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
|
||||
}
|
||||
mi_page_t* const page = _mi_ptr_page(p);
|
||||
#if MI_DEBUG
|
||||
if (page == MI_PAGE_PTR_INVALID) {
|
||||
_mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);
|
||||
}
|
||||
#endif
|
||||
#if (MI_DEBUG>0 || MI_SECURE>=4)
|
||||
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
|
||||
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
return segment;
|
||||
return page;
|
||||
}
|
||||
|
||||
// Free a block
|
||||
// Fast path written carefully to prevent register spilling on the stack
|
||||
void mi_free(void* p) mi_attr_noexcept
|
||||
{
|
||||
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
|
||||
if mi_unlikely(segment==NULL) return;
|
||||
mi_page_t* const page = mi_checked_ptr_page(p,"mi_free");
|
||||
if mi_unlikely(page==NULL) return;
|
||||
|
||||
const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
|
||||
const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page));
|
||||
if mi_likely(is_local) { // thread-local free?
|
||||
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
|
||||
// thread-local, aligned, and not a full page
|
||||
|
@ -156,12 +139,12 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
}
|
||||
else {
|
||||
// page is full or contains (inner) aligned blocks; use generic path
|
||||
mi_free_generic_local(page, segment, p);
|
||||
mi_free_generic_local(page, p);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// not thread-local; use generic path
|
||||
mi_free_generic_mt(page, segment, p);
|
||||
mi_free_generic_mt(page, p);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -169,10 +152,8 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
bool _mi_free_delayed_block(mi_block_t* block) {
|
||||
// get segment and page
|
||||
mi_assert_internal(block!=NULL);
|
||||
const mi_segment_t* const segment = _mi_ptr_segment(block);
|
||||
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
|
||||
mi_assert_internal(_mi_thread_id() == segment->thread_id);
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, block);
|
||||
mi_page_t* const page = mi_checked_ptr_page(block,"_mi_free_delayed_block");
|
||||
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
|
||||
|
||||
// Clear the no-delayed flag so delayed freeing is used again for this page.
|
||||
// This must be done before collecting the free lists on this page -- otherwise
|
||||
|
@ -242,20 +223,19 @@ static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block
|
|||
}
|
||||
|
||||
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
|
||||
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
|
||||
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
|
||||
{
|
||||
// first see if the segment was abandoned and if we can reclaim it into our thread
|
||||
if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 &&
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
segment->page_kind != MI_PAGE_HUGE &&
|
||||
#endif
|
||||
mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned?
|
||||
// first see if the page was abandoned and if we can reclaim it into our thread
|
||||
if (mi_page_is_abandoned(page) &&
|
||||
(_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 ||
|
||||
mi_page_is_singleton(page) // only one block, and we are free-ing it
|
||||
) &&
|
||||
mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
|
||||
{
|
||||
// the segment is abandoned, try to reclaim it into our heap
|
||||
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
|
||||
mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
|
||||
mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
|
||||
// the page is abandoned, try to reclaim it into our heap
|
||||
if (_mi_heap_try_reclaim(mi_heap_get_default(), page)) { // TODO: avoid putting it in the full free queue
|
||||
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
|
||||
// mi_assert_internal(mi_heap_get_default()->tld->subproc == page->subproc);
|
||||
mi_free(block); // recursively free as now it will be a local free in our heap
|
||||
return;
|
||||
}
|
||||
|
@ -272,17 +252,12 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg
|
|||
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
|
||||
_mi_padding_shrink(page, block, sizeof(mi_block_t));
|
||||
|
||||
if (segment->page_kind == MI_PAGE_HUGE) {
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
// huge page segments are always abandoned and can be freed immediately
|
||||
_mi_segment_huge_page_free(segment, page, block);
|
||||
return;
|
||||
#else
|
||||
if (mi_page_is_huge(page)) {
|
||||
mi_assert_internal(mi_page_is_singleton(page));
|
||||
// huge pages are special as they occupy the entire segment
|
||||
// as these are large we reset the memory occupied by the page so it is available to other threads
|
||||
// (as the owning thread needs to actually free the memory later).
|
||||
_mi_segment_huge_page_reset(segment, page, block);
|
||||
#endif
|
||||
_mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively
|
||||
}
|
||||
else {
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
|
||||
|
@ -316,9 +291,8 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p
|
|||
}
|
||||
|
||||
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
|
||||
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
|
||||
if mi_unlikely(segment==NULL) return 0;
|
||||
const mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
const mi_page_t* const page = mi_checked_ptr_page(p,msg);
|
||||
if mi_unlikely(page==NULL) return 0;
|
||||
if mi_likely(!mi_page_has_aligned(page)) {
|
||||
const mi_block_t* block = (const mi_block_t*)p;
|
||||
return mi_page_usable_size_of(page, block);
|
||||
|
@ -523,7 +497,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
|
|||
const size_t usize = mi_page_usable_size_of(page, block);
|
||||
mi_heap_stat_decrease(heap, malloc, usize);
|
||||
#endif
|
||||
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
|
||||
mi_heap_stat_decrease(heap, normal, bsize);
|
||||
#if (MI_STAT > 1)
|
||||
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
|
||||
|
|
|
@ -7,11 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/atomic.h"
|
||||
#include "mimalloc/prim.h" // mi_prim_get_default_heap
|
||||
|
||||
#include <string.h> // memset, memcpy
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1920)
|
||||
#pragma warning(disable:4204) // non-constant aggregate initializer
|
||||
#endif
|
||||
|
@ -258,7 +255,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
|
|||
mi_assert_internal(heap != NULL);
|
||||
mi_assert_internal(mi_heap_is_initialized(heap));
|
||||
// TODO: copy full empty heap instead?
|
||||
memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
|
||||
_mi_memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
|
||||
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
|
||||
heap->thread_delayed_free = NULL;
|
||||
heap->page_count = 0;
|
||||
|
|
55
src/os.c
55
src/os.c
|
@ -59,6 +59,10 @@ size_t _mi_os_large_page_size(void) {
|
|||
return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
|
||||
}
|
||||
|
||||
size_t _mi_os_virtual_address_bits(void) {
|
||||
return mi_os_mem_config.virtual_address_bits;
|
||||
}
|
||||
|
||||
bool _mi_os_use_large_page(size_t size, size_t alignment) {
|
||||
// if we have access, check the size and alignment requirements
|
||||
if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
|
||||
|
@ -103,58 +107,10 @@ static void* mi_align_down_ptr(void* p, size_t alignment) {
|
|||
return (void*)_mi_align_down((uintptr_t)p, alignment);
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
aligned hinting
|
||||
-------------------------------------------------------------- */
|
||||
|
||||
// On systems with enough virtual address bits, we can do efficient aligned allocation by using
|
||||
// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address
|
||||
// space (64TiB) we use this technique. (but see issue #939)
|
||||
#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT)
|
||||
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
|
||||
|
||||
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
|
||||
// If this returns NULL, the OS will determine the address but on some OS's that may not be
|
||||
// properly aligned which can be more costly as it needs to be adjusted afterwards.
|
||||
// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
|
||||
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
|
||||
// in the middle of the 2TiB - 6TiB address range (see issue #372))
|
||||
|
||||
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
|
||||
#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
|
||||
#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
|
||||
|
||||
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
|
||||
{
|
||||
if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
|
||||
if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space
|
||||
size = _mi_align_up(size, MI_SEGMENT_SIZE);
|
||||
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
|
||||
#if (MI_SECURE>0)
|
||||
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
|
||||
#endif
|
||||
|
||||
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
|
||||
if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
|
||||
uintptr_t init = MI_HINT_BASE;
|
||||
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
|
||||
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
|
||||
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
|
||||
#endif
|
||||
uintptr_t expected = hint + size;
|
||||
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
|
||||
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
|
||||
}
|
||||
if (hint%try_alignment != 0) return NULL;
|
||||
return (void*)hint;
|
||||
}
|
||||
#else
|
||||
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
|
||||
MI_UNUSED(try_alignment); MI_UNUSED(size);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
|
@ -380,12 +336,10 @@ void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
|
|||
----------------------------------------------------------- */
|
||||
|
||||
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) {
|
||||
mi_assert(offset <= MI_SEGMENT_SIZE);
|
||||
mi_assert(offset <= size);
|
||||
mi_assert((alignment % _mi_os_page_size()) == 0);
|
||||
*memid = _mi_memid_none();
|
||||
if (stats == NULL) stats = &_mi_stats_main;
|
||||
if (offset > MI_SEGMENT_SIZE) return NULL;
|
||||
if (offset == 0) {
|
||||
// regular aligned allocation
|
||||
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats);
|
||||
|
@ -605,7 +559,6 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
|||
#endif
|
||||
}
|
||||
end = start + size;
|
||||
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
|
||||
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
|
||||
|
||||
if (total_size != NULL) *total_size = size;
|
||||
|
|
90
src/page-map.c
Normal file
90
src/page-map.c
Normal file
|
@ -0,0 +1,90 @@
|
|||
/*----------------------------------------------------------------------------
|
||||
Copyright (c) 2023-2024, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
mi_decl_cache_align signed char* _mi_page_map = NULL;
|
||||
static bool mi_page_map_all_committed = false;
|
||||
static size_t mi_blocks_per_commit_bit = 1;
|
||||
static mi_memid_t mi_page_map_memid;
|
||||
static mi_bitmap_t mi_page_map_commit;
|
||||
|
||||
static bool mi_page_map_init(void) {
|
||||
size_t vbits = _mi_os_virtual_address_bits();
|
||||
if (vbits >= 48) vbits = 47;
|
||||
// 1 byte per block = 2 GiB for 128 TiB address space (48 bit = 256 TiB address space)
|
||||
// 64 KiB for 4 GiB address space (on 32-bit)
|
||||
const size_t page_map_size = (MI_ZU(1) << (vbits >> MI_ARENA_BLOCK_SHIFT));
|
||||
|
||||
const size_t min_commit_size = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);
|
||||
mi_blocks_per_commit_bit = mi_block_count_of_size(min_commit_size);
|
||||
|
||||
mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems
|
||||
_mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 0, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
|
||||
if (_mi_page_map==NULL) {
|
||||
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
|
||||
return false;
|
||||
}
|
||||
if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
|
||||
_mi_warning_message("the page map was committed on-demand but not zero initialized!\n");
|
||||
_mi_memzero_aligned(_mi_page_map, page_map_size);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* block_count) {
|
||||
size_t page_size;
|
||||
*page_start = mi_page_area(page, &page_size);
|
||||
if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE; } // furthest interior pointer
|
||||
*block_count = mi_block_count_of_size(page_size);
|
||||
return ((uintptr_t)*page_start >> MI_ARENA_BLOCK_SHIFT);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void _mi_page_map_register(mi_page_t* page) {
|
||||
if mi_unlikely(_mi_page_map == NULL) {
|
||||
if (!mi_page_map_init()) return;
|
||||
}
|
||||
uint8_t* page_start;
|
||||
size_t block_count;
|
||||
const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
|
||||
|
||||
// is the page map area that contains the page address committed?
|
||||
if (!mi_page_map_all_committed) {
|
||||
const size_t commit_bit_count = _mi_divide_up(block_count, mi_blocks_per_commit_bit);
|
||||
const size_t commit_bit_idx = idx / mi_blocks_per_commit_bit;
|
||||
for (size_t i = 0; i < commit_bit_count; i++) { // per bit to avoid crossing over bitmap chunks
|
||||
if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, commit_bit_idx + i, 1)) {
|
||||
// this may race, in which case we do multiple commits (which is ok)
|
||||
_mi_os_commit(page_start + (i*mi_blocks_per_commit_bit*MI_ARENA_BLOCK_SIZE), mi_blocks_per_commit_bit* MI_ARENA_BLOCK_SIZE, NULL, NULL);
|
||||
mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, commit_bit_idx + i, 1, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set the offsets
|
||||
for (int i = 0; i < block_count; i++) {
|
||||
mi_assert_internal(i < 128);
|
||||
_mi_page_map[idx + i] = (int8_t)(-i-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void _mi_page_map_unregister(mi_page_t* page) {
|
||||
mi_assert_internal(_mi_page_map != NULL);
|
||||
|
||||
// get index and count
|
||||
uint8_t* page_start;
|
||||
size_t block_count;
|
||||
const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
|
||||
|
||||
// unset the offsets
|
||||
_mi_memzero(_mi_page_map + idx, block_count);
|
||||
}
|
67
src/page.c
67
src/page.c
|
@ -59,7 +59,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) {
|
|||
|
||||
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
|
||||
size_t psize;
|
||||
uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
|
||||
uint8_t* page_area = mi_page_area(page, &psize);
|
||||
mi_block_t* start = (mi_block_t*)page_area;
|
||||
mi_block_t* end = (mi_block_t*)(page_area + psize);
|
||||
while(p != NULL) {
|
||||
|
@ -83,10 +83,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
|
|||
mi_assert_internal(page->capacity <= page->reserved);
|
||||
|
||||
// const size_t bsize = mi_page_block_size(page);
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
uint8_t* start = mi_page_start(page);
|
||||
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
|
||||
mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE));
|
||||
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
|
||||
|
||||
mi_assert_internal(mi_page_list_is_valid(page,page->free));
|
||||
|
@ -122,15 +119,11 @@ bool _mi_page_is_valid(mi_page_t* page) {
|
|||
mi_assert_internal(page->keys[0] != 0);
|
||||
#endif
|
||||
if (mi_page_heap(page)!=NULL) {
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
if (segment->page_kind != MI_PAGE_HUGE)
|
||||
#endif
|
||||
mi_assert_internal(!_mi_process_is_initialized || page->thread_id == mi_page_heap(page)->thread_id || page->thread_id==0);
|
||||
{
|
||||
mi_page_queue_t* pq = mi_page_queue_of(page);
|
||||
mi_assert_internal(mi_page_queue_contains(pq, page));
|
||||
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
|
||||
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_MAX_OBJ_SIZE || mi_page_is_in_full(page));
|
||||
mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
|
||||
}
|
||||
}
|
||||
|
@ -274,16 +267,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
|
|||
#if !MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(pq != NULL);
|
||||
mi_assert_internal(mi_heap_contains_queue(heap, pq));
|
||||
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size);
|
||||
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size);
|
||||
#endif
|
||||
mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
|
||||
mi_page_t* page = _mi_heap_page_alloc(heap, block_size, page_alignment);
|
||||
if (page == NULL) {
|
||||
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
|
||||
return NULL;
|
||||
}
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
|
||||
#endif
|
||||
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
|
||||
// a fresh page was found, initialize it
|
||||
const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
|
||||
|
@ -384,7 +374,6 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
|
|||
mi_heap_t* pheap = mi_page_heap(page);
|
||||
|
||||
// remove from our page list
|
||||
mi_segments_tld_t* segments_tld = &pheap->tld->segments;
|
||||
mi_page_queue_remove(pq, page);
|
||||
|
||||
// page is no longer associated with our heap
|
||||
|
@ -399,8 +388,8 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
|
|||
#endif
|
||||
|
||||
// and abandon it
|
||||
mi_assert_internal(mi_page_heap(page) == NULL);
|
||||
_mi_segment_page_abandon(page,segments_tld);
|
||||
mi_assert_internal(mi_page_is_abandoned(page));
|
||||
_mi_arena_page_abandon(page,&pheap->tld);
|
||||
}
|
||||
|
||||
// force abandon a page
|
||||
|
@ -411,8 +400,7 @@ void _mi_page_force_abandon(mi_page_t* page) {
|
|||
|
||||
// ensure this page is no longer in the heap delayed free list
|
||||
_mi_heap_delayed_free_all(heap);
|
||||
// We can still access the page meta-info even if it is freed as we ensure
|
||||
// in `mi_segment_force_abandon` that the segment is not freed (yet)
|
||||
// TODO: can we still access the page meta-info even if it is freed?
|
||||
if (page->capacity == 0) return; // it may have been freed now
|
||||
|
||||
// and now unlink it from the page queue and abandon (or free)
|
||||
|
@ -433,17 +421,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
|
|||
mi_assert_internal(mi_page_all_free(page));
|
||||
mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
|
||||
|
||||
mi_heap_t* pheap = mi_page_heap(page);
|
||||
|
||||
// no more aligned blocks in here
|
||||
mi_page_set_has_aligned(page, false);
|
||||
|
||||
// remove from the page list
|
||||
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
|
||||
mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
|
||||
mi_page_queue_remove(pq, page);
|
||||
|
||||
// and free it
|
||||
mi_page_set_heap(page,NULL);
|
||||
_mi_segment_page_free(page, force, segments_tld);
|
||||
_mi_arena_page_free(page, force, &pheap->tld);
|
||||
}
|
||||
|
||||
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
|
||||
|
@ -474,7 +463,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
|
|||
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
|
||||
if (pq->last==page && pq->first==page) { // the only page in the queue?
|
||||
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
|
||||
page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
|
||||
page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
|
||||
mi_heap_t* heap = mi_page_heap(page);
|
||||
mi_assert_internal(pq >= heap->pages);
|
||||
const size_t index = pq - heap->pages;
|
||||
|
@ -639,7 +628,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
|
|||
|
||||
size_t page_size;
|
||||
//uint8_t* page_start =
|
||||
_mi_segment_page_start(_mi_page_segment(page), page, &page_size);
|
||||
mi_page_area(page, &page_size);
|
||||
mi_stat_counter_increase(tld->stats.pages_extended, 1);
|
||||
|
||||
// calculate the extend count
|
||||
|
@ -676,15 +665,13 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
|
|||
// Initialize a fresh page
|
||||
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
|
||||
mi_assert(page != NULL);
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
mi_assert(segment != NULL);
|
||||
mi_assert_internal(block_size > 0);
|
||||
// set fields
|
||||
mi_page_set_heap(page, heap);
|
||||
page->block_size = block_size;
|
||||
size_t page_size;
|
||||
page->page_start = _mi_segment_page_start(segment, page, &page_size);
|
||||
mi_track_mem_noaccess(page->page_start,page_size);
|
||||
uint8_t* page_start = mi_page_area(page, &page_size);
|
||||
mi_track_mem_noaccess(page_start,page_size);
|
||||
mi_assert_internal(page_size / block_size < (1L<<16));
|
||||
page->reserved = (uint16_t)(page_size / block_size);
|
||||
mi_assert_internal(page->reserved > 0);
|
||||
|
@ -692,15 +679,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
|||
page->keys[0] = _mi_heap_random_next(heap);
|
||||
page->keys[1] = _mi_heap_random_next(heap);
|
||||
#endif
|
||||
page->free_is_zero = page->is_zero_init;
|
||||
page->free_is_zero = page->memid.initially_zero;
|
||||
#if MI_DEBUG>2
|
||||
if (page->is_zero_init) {
|
||||
if (page->memid.initially_zero) {
|
||||
mi_track_mem_defined(page->page_start, page_size);
|
||||
mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size));
|
||||
mi_assert_expensive(mi_mem_is_zero(page_start, page_size));
|
||||
}
|
||||
#endif
|
||||
if (block_size > 0 && _mi_is_power_of_two(block_size)) {
|
||||
page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size));
|
||||
page->block_size_shift = (uint8_t)mi_ctz(block_size);
|
||||
}
|
||||
else {
|
||||
page->block_size_shift = 0;
|
||||
|
@ -734,13 +721,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
|||
// search for a best next page to use for at most N pages (often cut short if immediate blocks are available)
|
||||
#define MI_MAX_CANDIDATE_SEARCH (8)
|
||||
|
||||
// is the page not yet used up to its reserved space?
|
||||
static bool mi_page_is_expandable(const mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL);
|
||||
mi_assert_internal(page->capacity <= page->reserved);
|
||||
return (page->capacity < page->reserved);
|
||||
}
|
||||
|
||||
|
||||
// Find a page with free blocks of `page->block_size`.
|
||||
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
|
||||
|
@ -907,7 +887,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
|
|||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_page_queue_t* pq = NULL;
|
||||
#else
|
||||
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size
|
||||
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_MAX_OBJ_SIZE+1); // always in the huge queue regardless of the block size
|
||||
mi_assert_internal(mi_page_queue_is_huge(pq));
|
||||
#endif
|
||||
mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
|
||||
|
@ -915,10 +895,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
|
|||
mi_assert_internal(mi_page_block_size(page) >= size);
|
||||
mi_assert_internal(mi_page_immediate_available(page));
|
||||
mi_assert_internal(mi_page_is_huge(page));
|
||||
mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE);
|
||||
mi_assert_internal(_mi_page_segment(page)->used==1);
|
||||
mi_assert_internal(mi_page_is_singleton(page));
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
|
||||
mi_assert_internal(mi_page_is_abandoned(page));
|
||||
mi_page_set_heap(page, NULL);
|
||||
#endif
|
||||
mi_heap_stat_increase(heap, huge, mi_page_block_size(page));
|
||||
|
@ -933,7 +912,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
|
|||
static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
|
||||
// huge allocation?
|
||||
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
|
||||
if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
|
||||
if mi_unlikely(req_size > (MI_LARGE_MAX_OBJ_SIZE - MI_PADDING_SIZE) || huge_alignment > 0) {
|
||||
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
|
||||
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
|
||||
return NULL;
|
||||
|
|
|
@ -20,7 +20,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
// containing the whole library. If it is linked first
|
||||
// it will override all the standard library allocation
|
||||
// functions (on Unix's).
|
||||
#include "alloc.c" // includes alloc-override.c
|
||||
#include "alloc.c" // includes alloc-override.c and free.c
|
||||
#include "alloc-aligned.c"
|
||||
#include "alloc-posix.c"
|
||||
#include "arena.c"
|
||||
|
@ -31,6 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#include "options.c"
|
||||
#include "os.c"
|
||||
#include "page.c" // includes page-queue.c
|
||||
#include "page-map.c"
|
||||
#include "random.c"
|
||||
#include "segment.c"
|
||||
#include "segment-map.c"
|
||||
|
|
599
src/xbitmap.c
599
src/xbitmap.c
|
@ -1,599 +0,0 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/bits.h"
|
||||
#include "xbitmap.h"
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bfields
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static inline size_t mi_bfield_ctz(mi_bfield_t x) {
|
||||
return mi_ctz(x);
|
||||
}
|
||||
|
||||
static inline size_t mi_bfield_clz(mi_bfield_t x) {
|
||||
return mi_clz(x);
|
||||
}
|
||||
|
||||
// find the least significant bit that is set (i.e. count trailing zero's)
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
|
||||
return mi_bsf(x,idx);
|
||||
}
|
||||
|
||||
static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
|
||||
return mi_rotr(x,r);
|
||||
}
|
||||
|
||||
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
|
||||
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
|
||||
if (set) {
|
||||
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else {
|
||||
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
|
||||
// `already_xset` is true if all bits for the mask were already set/cleared.
|
||||
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
|
||||
*already_xset = ((old&mask) == mask);
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
|
||||
*already_xset = ((old&mask) == 0);
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
|
||||
return mi_bfield_atomic_xset(set, b, idx);
|
||||
}
|
||||
|
||||
|
||||
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != 0) return false; // the mask bits are no longer 0
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
|
||||
return true;
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != mask) return false; // the mask bits are no longer set
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if all bits corresponding to a mask are set/cleared.
|
||||
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
return ((*b & mask) == mask);
|
||||
}
|
||||
else {
|
||||
return ((*b & mask) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)* b, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
|
||||
return mi_bfield_atomic_try_xset_mask(set,b,mask);
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap chunks
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
|
||||
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = cidx / MI_BFIELD_BITS;
|
||||
const size_t idx = cidx % MI_BFIELD_BITS;
|
||||
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
|
||||
}
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = byte_idx / MI_BFIELD_SIZE;
|
||||
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
|
||||
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_transition = true;
|
||||
bool all_already_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
bool already_xset;
|
||||
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
|
||||
all_already_xset = all_already_xset && already_xset;
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
*palready_xset = all_already_xset;
|
||||
return all_transition;
|
||||
}
|
||||
|
||||
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
||||
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
all_xset = all_xset && mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask);
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
return all_xset;
|
||||
}
|
||||
|
||||
// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving all bit fields as is.
|
||||
static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
if (n==0) return true;
|
||||
size_t start_idx = cidx % MI_BFIELD_BITS;
|
||||
size_t start_field = cidx / MI_BFIELD_BITS;
|
||||
size_t end_field = MI_BITMAP_CHUNK_FIELDS;
|
||||
size_t mask_mid = 0;
|
||||
size_t mask_end = 0;
|
||||
|
||||
// first field
|
||||
size_t field = start_field;
|
||||
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << start_idx);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false;
|
||||
|
||||
// done?
|
||||
n -= m;
|
||||
if (n==0) return true;
|
||||
|
||||
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
|
||||
|
||||
// mid fields
|
||||
while (n >= MI_BFIELD_BITS) {
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
mask_mid = ~MI_ZU(0);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
|
||||
n -= MI_BFIELD_BITS;
|
||||
}
|
||||
|
||||
// last field
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BFIELD_BITS);
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
end_field = field;
|
||||
mask_end = (MI_ZU(1)<<n)-1;
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
restore:
|
||||
// field is on the field that failed to set atomically; we need to restore all previous fields
|
||||
mi_assert_internal(field > start_field);
|
||||
while( field > start_field) {
|
||||
field--;
|
||||
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
|
||||
bool already_xset;
|
||||
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// find least 1-bit in a chunk and try unset it atomically
|
||||
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
|
||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
|
||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
|
||||
mi_assert_internal(mask != 0);
|
||||
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
size_t cidx;
|
||||
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
|
||||
*pidx = (i*MI_BFIELD_BITS + idx);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find least byte in a chunk with all bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
|
||||
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
|
||||
if (mask == 0) return false;
|
||||
const size_t i = _tzcnt_u32(mask);
|
||||
mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t chunk_idx = i / MI_BFIELD_SIZE;
|
||||
const size_t byte_idx = i % MI_BFIELD_SIZE;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
const mi_bfield_t x = chunk->bfields[i];
|
||||
// has_set8 has low bit in each byte set if the byte in x == 0xFF
|
||||
const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
|
||||
(x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
|
||||
>> 7; // shift high bit to low bit
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
|
||||
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
|
||||
mi_assert_internal((idx%8)==0);
|
||||
const size_t byte_idx = idx/8;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
|
||||
*pidx = (i*MI_BFIELD_BITS) + idx;
|
||||
mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// else continue
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
|
||||
// todo: try avx2 and neon version
|
||||
// todo: allow spanning across bfield boundaries?
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
|
||||
const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
mi_bfield_t b = chunk->bfields[i];
|
||||
size_t bshift = 0;
|
||||
size_t idx;
|
||||
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
||||
b >>= idx;
|
||||
bshift += idx;
|
||||
if (bshift + n >= MI_BFIELD_BITS) break;
|
||||
|
||||
if ((b&mask) == mask) { // found a match
|
||||
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
|
||||
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
|
||||
*pidx = (i*MI_BFIELD_BITS) + bshift;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// if failed to atomically commit, try again from this position
|
||||
b = (chunk->bfields[i] >> bshift);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// advance
|
||||
const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
|
||||
mi_assert_internal(ones>0);
|
||||
bshift += ones;
|
||||
b >>= ones;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// are all bits in a bitmap chunk set?
|
||||
static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_test_all_ones(vec);
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x & chunk->bfields[i];
|
||||
}
|
||||
return (~x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
// are all bits in a bitmap chunk clear?
|
||||
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_testz_si256( vec, vec );
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x | chunk->bfields[i];
|
||||
}
|
||||
return (x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
|
||||
if (!already_zero) {
|
||||
_mi_memzero_aligned(bitmap, sizeof(*bitmap));
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
|
||||
|
||||
// first chunk
|
||||
size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
|
||||
if (m > n) { m = n; }
|
||||
bool already_xset;
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
|
||||
|
||||
// n can be large so use memset for efficiency for all in-between chunks
|
||||
chunk_idx++;
|
||||
n -= m;
|
||||
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
|
||||
if (mid_chunks > 0) {
|
||||
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
|
||||
chunk_idx += mid_chunks;
|
||||
n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
|
||||
}
|
||||
|
||||
// last chunk
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
|
||||
}
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
mi_assert_internal(idx%8 == 0);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
|
||||
return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
|
||||
if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
bool local_already_xset;
|
||||
if (already_xset==NULL) { already_xset = &local_already_xset; }
|
||||
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
|
||||
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
|
||||
}
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
|
||||
#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
|
||||
{ size_t _set_idx; \
|
||||
size_t _start = start % MI_BFIELD_BITS; \
|
||||
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
|
||||
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
|
||||
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
|
||||
|
||||
#define mi_bitmap_forall_set_chunks_end() \
|
||||
_start += _set_idx+1; /* so chunk_idx stays valid */ \
|
||||
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
|
||||
_any_set >>= 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// we may find that all are unset only on a second iteration but that is ok as
|
||||
// _any_set is a conservative approximation.
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
|
||||
mi_assert_internal((*pidx % 8) == 0);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
|
||||
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
|
||||
// TODO: allow spanning across chunk boundaries
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false;
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
|
@ -1,94 +0,0 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_XBITMAP_H
|
||||
#define MI_XBITMAP_H
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Definitions
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
typedef size_t mi_bfield_t;
|
||||
|
||||
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
|
||||
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
|
||||
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
|
||||
#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
|
||||
#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
|
||||
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
|
||||
|
||||
#define MI_BITMAP_CHUNK_BITS_SHIFT (8) // 2^8 = 256 bits per chunk
|
||||
#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
|
||||
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
|
||||
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
|
||||
|
||||
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
|
||||
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
|
||||
} mi_bitmap_chunk_t;
|
||||
|
||||
|
||||
typedef mi_decl_align(32) struct mi_bitmap_s {
|
||||
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
|
||||
_Atomic(mi_bfield_t)any_set;
|
||||
} mi_bitmap_t;
|
||||
|
||||
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
typedef bool mi_bit_t;
|
||||
#define MI_BIT_SET (true)
|
||||
#define MI_BIT_CLEAR (false)
|
||||
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
|
||||
|
||||
#endif // MI_XBITMAP_H
|
Loading…
Add table
Reference in a new issue