mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-10 17:29:31 +03:00
wip: use epoch with 512bit chunks
This commit is contained in:
parent
1d7a9f62a5
commit
69ac69abac
8 changed files with 574 additions and 256 deletions
|
@ -145,20 +145,13 @@ typedef int32_t mi_ssize_t;
|
|||
|
||||
size_t _mi_clz_generic(size_t x);
|
||||
size_t _mi_ctz_generic(size_t x);
|
||||
uint32_t _mi_ctz_generic32(uint32_t x);
|
||||
|
||||
static inline size_t mi_ctz(size_t x) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
|
||||
uint64_t r;
|
||||
__asm volatile ("tzcnt\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_ARM64
|
||||
uint64_t r;
|
||||
__asm volatile ("rbit\t%0, %1\n\tclz\t%0, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_RISCV
|
||||
size_t r;
|
||||
__asm volatile ("ctz\t%0, %1" : "=&r"(r) : "r"(x) : );
|
||||
return r;
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_tzcnt_u64(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
|
@ -168,6 +161,17 @@ static inline size_t mi_ctz(size_t x) {
|
|||
#else
|
||||
return (_BitScanForward64(&idx, x) ? (size_t)idx : 64);
|
||||
#endif
|
||||
/*
|
||||
// for arm64 and riscv, the builtin_ctz is defined for 0 as well
|
||||
#elif defined(__GNUC__) && MI_ARCH_ARM64
|
||||
uint64_t r;
|
||||
__asm volatile ("rbit\t%0, %1\n\tclz\t%0, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_RISCV
|
||||
size_t r;
|
||||
__asm volatile ("ctz\t%0, %1" : "=&r"(r) : "r"(x) : );
|
||||
return r;
|
||||
*/
|
||||
#elif mi_has_builtin_size(ctz)
|
||||
return (x!=0 ? (size_t)mi_builtin_size(ctz)(x) : MI_SIZE_BITS);
|
||||
#else
|
||||
|
@ -177,18 +181,10 @@ static inline size_t mi_ctz(size_t x) {
|
|||
}
|
||||
|
||||
static inline size_t mi_clz(size_t x) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
|
||||
uint64_t r;
|
||||
__asm volatile ("lzcnt\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_ARM64
|
||||
uint64_t r;
|
||||
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_RISCV
|
||||
size_t r;
|
||||
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : );
|
||||
return r;
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_lzcnt_u64(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
|
@ -198,6 +194,17 @@ static inline size_t mi_clz(size_t x) {
|
|||
#else
|
||||
return (_BitScanReverse64(&idx, x) ? 63 - (size_t)idx : 64);
|
||||
#endif
|
||||
/*
|
||||
// for arm64 and riscv, the builtin_clz is defined for 0 as well
|
||||
#elif defined(__GNUC__) && MI_ARCH_ARM64
|
||||
uint64_t r;
|
||||
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_RISCV
|
||||
size_t r;
|
||||
__asm volatile ("clz\t%0, %1" : "=&r"(r) : "r"(x) : );
|
||||
return r;
|
||||
*/
|
||||
#elif mi_has_builtin_size(clz)
|
||||
return (x!=0 ? (size_t)mi_builtin_size(clz)(x) : MI_SIZE_BITS);
|
||||
#else
|
||||
|
@ -206,6 +213,26 @@ static inline size_t mi_clz(size_t x) {
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t mi_ctz32(uint32_t x) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
|
||||
uint32_t r;
|
||||
__asm volatile ("tzcntl\t%1, %0" : "=&r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (uint32_t)_tzcnt_u32(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
unsigned long idx;
|
||||
return (_BitScanForward(&idx, x) ? (uint32_t)idx : 32);
|
||||
#elif mi_has_builtin(ctz) && (INT_MAX == INT32_MAX)
|
||||
return (x!=0 ? (uint32_t)mi_builtin(ctz)(x) : 32);
|
||||
#elif mi_has_builtin(ctzl) && (LONG_MAX == INT32_MAX)
|
||||
return (x!=0 ? (uint32_t)mi_builtin(ctzl)(x) : 32);
|
||||
#else
|
||||
#define MI_HAS_FAST_BITSCAN 0
|
||||
return _mi_ctz_generic32(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef MI_HAS_FAST_BITSCAN
|
||||
#define MI_HAS_FAST_BITSCAN 1
|
||||
#endif
|
||||
|
@ -229,6 +256,22 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
|
|||
#endif
|
||||
}
|
||||
|
||||
// Bit scan forward: find the least significant bit that is set (i.e. count trailing zero's)
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bsf32(uint32_t x, uint32_t* idx) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
// on x64 the carry flag is set on zero which gives better codegen
|
||||
bool is_zero;
|
||||
__asm ("tzcntl\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
|
||||
return !is_zero;
|
||||
#else
|
||||
*idx = mi_ctz32(x);
|
||||
return (x!=0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Bit scan reverse: find the most significant bit that is set
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
|
@ -248,29 +291,6 @@ static inline bool mi_bsr(size_t x, size_t* idx) {
|
|||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
find least/most significant bit position
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// Find most significant bit index, or MI_SIZE_BITS if 0
|
||||
static inline size_t mi_find_msb(size_t x) {
|
||||
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
unsigned long i;
|
||||
#if MI_SIZE_BITS==32
|
||||
return (_BitScanReverse(&i, x) ? i : 32);
|
||||
#else
|
||||
return (_BitScanReverse64(&i, x) ? i : 64);
|
||||
#endif
|
||||
#else
|
||||
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Find least significant bit index, or MI_SIZE_BITS if 0 (this equals `mi_ctz`, count trailing zero's)
|
||||
static inline size_t mi_find_lsb(size_t x) {
|
||||
return mi_ctz(x);
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
rotate
|
||||
|
@ -288,13 +308,26 @@ static inline size_t mi_rotr(size_t x, size_t r) {
|
|||
return _rotr64(x,(int)r);
|
||||
#endif
|
||||
#else
|
||||
// The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to
|
||||
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
|
||||
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
|
||||
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
|
||||
return ((x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1))));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t mi_rotr32(uint32_t x, uint32_t r) {
|
||||
#if mi_has_builtin(rotateright32)
|
||||
return mi_builtin(rotateright32)(x, r);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
return _lrotr(x, (int)r);
|
||||
#else
|
||||
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
|
||||
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
|
||||
const unsigned int rshift = (unsigned int)(r) & 31;
|
||||
return ((x >> rshift) | (x << ((-rshift) & 31)));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t mi_rotl(size_t x, size_t r) {
|
||||
#if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64)
|
||||
return mi_builtin(rotateleft64)(x,r);
|
||||
|
@ -307,7 +340,7 @@ static inline size_t mi_rotl(size_t x, size_t r) {
|
|||
return _rotl64(x,(int)r);
|
||||
#endif
|
||||
#else
|
||||
// The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to
|
||||
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
|
||||
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
|
||||
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
|
||||
return ((x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1))));
|
||||
|
|
|
@ -120,7 +120,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#endif
|
||||
#endif
|
||||
#ifndef MI_BITMAP_CHUNK_BITS_SHIFT
|
||||
#define MI_BITMAP_CHUNK_BITS_SHIFT 8 // optimized for 256 bits per chunk (avx2)
|
||||
#define MI_BITMAP_CHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512)
|
||||
#endif
|
||||
|
||||
#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
|
||||
|
|
59
src/arena.c
59
src/arena.c
|
@ -197,7 +197,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
|
|||
|
||||
// set the dirty bits
|
||||
if (arena->memid.initially_zero) {
|
||||
memid->initially_zero = mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_dirty, slice_index, slice_count, NULL);
|
||||
memid->initially_zero = mi_bitmap_setN(&arena->slices_dirty, slice_index, slice_count, NULL);
|
||||
}
|
||||
|
||||
// set commit state
|
||||
|
@ -206,7 +206,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
|
|||
memid->initially_committed = true;
|
||||
|
||||
bool all_already_committed;
|
||||
mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slice_count, &all_already_committed);
|
||||
mi_bitmap_setN(&arena->slices_committed, slice_index, slice_count, &all_already_committed);
|
||||
if (!all_already_committed) {
|
||||
bool commit_zero = false;
|
||||
if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero, NULL)) {
|
||||
|
@ -219,13 +219,13 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
|
|||
}
|
||||
else {
|
||||
// no need to commit, but check if already fully committed
|
||||
memid->initially_committed = mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slice_count);
|
||||
memid->initially_committed = mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count);
|
||||
}
|
||||
|
||||
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->slices_free, slice_index, slice_count));
|
||||
if (commit) { mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slice_count)); }
|
||||
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_dirty, slice_index, slice_count));
|
||||
// mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->slices_purge, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count));
|
||||
if (commit) { mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)); }
|
||||
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count));
|
||||
// mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
@ -455,10 +455,10 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl
|
|||
// found an abandoned page of the right size
|
||||
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
|
||||
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
|
||||
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->slices_free, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_dirty, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_xsetN(MI_BIT_CLEAR, &arena->slices_purge, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count));
|
||||
mi_assert_internal(mi_page_block_size(page) == block_size);
|
||||
mi_assert_internal(!mi_page_is_full(page));
|
||||
mi_assert_internal(mi_page_is_abandoned(page));
|
||||
|
@ -626,7 +626,7 @@ void _mi_arena_page_abandon(mi_page_t* page, mi_tld_t* tld) {
|
|||
size_t bin = _mi_bin(mi_page_block_size(page));
|
||||
size_t slice_index;
|
||||
mi_arena_t* arena = mi_page_arena(page, &slice_index, NULL);
|
||||
bool were_zero = mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_abandoned[bin], slice_index, 1, NULL);
|
||||
bool were_zero = mi_bitmap_setN(&arena->slices_abandoned[bin], slice_index, 1, NULL);
|
||||
MI_UNUSED(were_zero); mi_assert_internal(were_zero);
|
||||
mi_atomic_increment_relaxed(&tld->subproc->abandoned_count[bin]);
|
||||
}
|
||||
|
@ -660,7 +660,7 @@ bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
|
|||
// return false;
|
||||
// }
|
||||
const size_t bin = _mi_bin(page->block_size);
|
||||
if (mi_bitmap_try_xsetN(MI_BIT_CLEAR, &arena->slices_abandoned[bin], slice_index, 1)) {
|
||||
if (mi_bitmap_try_clear(&arena->slices_abandoned[bin], slice_index)) {
|
||||
// we got it atomically
|
||||
_mi_page_reclaim(heap, page);
|
||||
mi_assert_internal(!mi_page_is_abandoned(page));
|
||||
|
@ -668,7 +668,7 @@ bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
|
|||
}
|
||||
else {
|
||||
if (mi_page_is_abandoned(page)) {
|
||||
mi_assert(false);
|
||||
// mi_assert(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -748,7 +748,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
|
|||
else {
|
||||
if (!all_committed) {
|
||||
// mark the entire range as no longer committed (so we recommit the full range when re-using)
|
||||
mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->slices_committed, slice_index, slice_count, NULL);
|
||||
mi_bitmap_clearN(&arena->slices_committed, slice_index, slice_count);
|
||||
mi_track_mem_noaccess(p, size);
|
||||
if (committed_size > 0) {
|
||||
// if partially committed, adjust the committed stats (is it will be recommitted when re-using)
|
||||
|
@ -764,7 +764,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
|
|||
}
|
||||
|
||||
// and make it available to others again
|
||||
bool all_inuse = mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_free, slice_index, slice_count, NULL);
|
||||
bool all_inuse = mi_bitmap_setN(&arena->slices_free, slice_index, slice_count, NULL);
|
||||
if (!all_inuse) {
|
||||
_mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", mi_arena_slice_start(arena,slice_index), mi_size_of_slices(slice_count));
|
||||
return;
|
||||
|
@ -906,14 +906,14 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
|||
}
|
||||
|
||||
// reserve our meta info (and reserve slices outside the memory area)
|
||||
mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->slices_free, info_slices /* start */, arena->slice_count - info_slices);
|
||||
mi_bitmap_unsafe_setN(&arena->slices_free, info_slices /* start */, arena->slice_count - info_slices);
|
||||
if (memid.initially_committed) {
|
||||
mi_bitmap_unsafe_xsetN(MI_BIT_SET, &arena->slices_committed, 0, arena->slice_count);
|
||||
mi_bitmap_unsafe_setN(&arena->slices_committed, 0, arena->slice_count);
|
||||
}
|
||||
else {
|
||||
mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_committed, 0, info_slices, NULL);
|
||||
mi_bitmap_setN(&arena->slices_committed, 0, info_slices, NULL);
|
||||
}
|
||||
mi_bitmap_xsetN(MI_BIT_SET, &arena->slices_dirty, 0, info_slices, NULL);
|
||||
mi_bitmap_setN(&arena->slices_dirty, 0, info_slices, NULL);
|
||||
|
||||
return mi_arena_add(arena, arena_id, &_mi_stats_main);
|
||||
}
|
||||
|
@ -973,10 +973,16 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
|
|||
_mi_output_message("%s%s:\n", prefix, header);
|
||||
size_t bit_count = 0;
|
||||
size_t bit_set_count = 0;
|
||||
for (int i = 0; i < MI_BFIELD_BITS && bit_count < slice_count; i++) {
|
||||
char buf[MI_BITMAP_CHUNK_BITS + 32]; _mi_memzero(buf, sizeof(buf));
|
||||
for (int i = 0; i < MI_BITMAP_CHUNK_COUNT && bit_count < slice_count; i++) {
|
||||
char buf[MI_BITMAP_CHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf));
|
||||
mi_bitmap_chunk_t* chunk = &bitmap->chunks[i];
|
||||
for (size_t j = 0, k = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) {
|
||||
if (j > 0 && (j % 4) == 0) {
|
||||
buf[k++] = '\n';
|
||||
_mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix);
|
||||
buf[k++] = ' ';
|
||||
buf[k++] = ' ';
|
||||
}
|
||||
if (bit_count < slice_count) {
|
||||
mi_bfield_t bfield = chunk->bfields[j];
|
||||
if (invert) bfield = ~bfield;
|
||||
|
@ -987,12 +993,11 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
|
|||
buf[k++] = ' ';
|
||||
}
|
||||
else {
|
||||
_mi_memset(buf + k, ' ', MI_BFIELD_BITS);
|
||||
_mi_memset(buf + k, 'o', MI_BFIELD_BITS);
|
||||
k += MI_BFIELD_BITS;
|
||||
}
|
||||
bit_count += MI_BFIELD_BITS;
|
||||
}
|
||||
|
||||
_mi_output_message("%s %s\n", prefix, buf);
|
||||
}
|
||||
_mi_output_message("%s total ('x'): %zu\n", prefix, bit_set_count);
|
||||
|
@ -1113,7 +1118,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices,
|
|||
const size_t size = mi_size_of_slices(slices);
|
||||
void* const p = mi_arena_slice_start(arena, slice_index);
|
||||
bool needs_recommit;
|
||||
if (mi_bitmap_is_xsetN(MI_BIT_SET, &arena->slices_committed, slice_index, slices)) {
|
||||
if (mi_bitmap_is_setN(&arena->slices_committed, slice_index, slices)) {
|
||||
// all slices are committed, we can purge freely
|
||||
needs_recommit = _mi_os_purge(p, size, stats);
|
||||
}
|
||||
|
@ -1128,11 +1133,11 @@ static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices,
|
|||
}
|
||||
|
||||
// clear the purged slices
|
||||
mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->slices_purge, slices, slice_index, NULL);
|
||||
mi_bitmap_clearN(&arena->slices_purge, slices, slice_index);
|
||||
|
||||
// update committed bitmap
|
||||
if (needs_recommit) {
|
||||
mi_bitmap_xsetN(MI_BIT_CLEAR, &arena->slices_committed, slices, slice_index, NULL);
|
||||
mi_bitmap_clearN(&arena->slices_committed, slices, slice_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
543
src/bitmap.c
543
src/bitmap.c
|
@ -44,85 +44,168 @@ static inline bool mi_bfield_find_least_to_xset(mi_bit_t set, mi_bfield_t x, siz
|
|||
return mi_bfield_find_least_bit((set ? ~x : x), idx);
|
||||
}
|
||||
|
||||
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
|
||||
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
// Set a bit atomically. Returns `true` if the bit transitioned from 0 to 1
|
||||
static inline bool mi_bfield_atomic_set(_Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
|
||||
if (set) {
|
||||
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else {
|
||||
}
|
||||
|
||||
// Clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0.
|
||||
static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
|
||||
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
|
||||
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
|
||||
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
if (set) {
|
||||
return mi_bfield_atomic_set(b, idx);
|
||||
}
|
||||
else {
|
||||
return mi_bfield_atomic_clear(b, idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Set a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's.
|
||||
static inline bool mi_bfield_atomic_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_set) {
|
||||
mi_assert_internal(mask != 0);
|
||||
mi_bfield_t old = mi_atomic_load_relaxed(b);
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
|
||||
if (already_set!=NULL) { *already_set = ((old&mask)==mask); }
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
|
||||
// Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's
|
||||
static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_clear) {
|
||||
mi_assert_internal(mask != 0);
|
||||
mi_bfield_t old = mi_atomic_load_relaxed(b);
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
|
||||
if (already_clear!=NULL) { *already_clear = ((old&mask)==0); }
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
|
||||
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
|
||||
// `already_xset` is true if all bits for the mask were already set/cleared.
|
||||
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
|
||||
static inline bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
|
||||
*already_xset = ((old&mask) == mask);
|
||||
return ((old&mask) == 0);
|
||||
return mi_bfield_atomic_set_mask(b, mask, already_xset);
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
|
||||
*already_xset = ((old&mask) == 0);
|
||||
return ((old&mask) == mask);
|
||||
else {
|
||||
return mi_bfield_atomic_clear_mask(b, mask, already_xset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Tries to set a bit atomically. Returns `true` if the bit transitioned from 0 to 1
|
||||
// and otherwise false (leaving the bit unchanged)
|
||||
static inline bool mi_bfield_atomic_try_set(_Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
return mi_bfield_atomic_set(b, idx); // for a single bit there is no difference
|
||||
}
|
||||
|
||||
// Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0.
|
||||
// `allclear` is set to true if the new bfield is all zeros (and false otherwise)
|
||||
static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
return mi_bfield_atomic_clear(b, idx);
|
||||
}
|
||||
|
||||
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
static inline bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
|
||||
return mi_bfield_atomic_xset(set, b, idx);
|
||||
}
|
||||
|
||||
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
|
||||
|
||||
// Tries to set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
|
||||
static inline bool mi_bfield_atomic_try_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
mi_bfield_t old = mi_atomic_load_relaxed(b);
|
||||
do {
|
||||
if ((old&mask) != 0) return false; // the mask bits are no longer 0
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
|
||||
return true;
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
}
|
||||
|
||||
// Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
mi_bfield_t old = mi_atomic_load_relaxed(b);
|
||||
do {
|
||||
if ((old&mask) != mask) return false; // the mask bits are no longer set
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static inline bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
return mi_bfield_atomic_try_set_mask(b, mask);
|
||||
}
|
||||
else {
|
||||
return mi_bfield_atomic_try_clear_mask(b, mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static inline bool mi_bfield_atomic_try_set8(_Atomic(mi_bfield_t)*b, size_t byte_idx) {
|
||||
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
|
||||
return mi_bfield_atomic_try_set_mask(b, mask);
|
||||
}
|
||||
|
||||
// Tries to clear a byte atomically, and returns true if the byte atomically transitioned from 0xFF to 0
|
||||
static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t byte_idx) {
|
||||
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
|
||||
return mi_bfield_atomic_try_clear_mask(b, mask);
|
||||
}
|
||||
|
||||
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t byte_idx) {
|
||||
static inline bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t byte_idx) {
|
||||
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
|
||||
return mi_bfield_atomic_try_xset_mask(set, b, mask);
|
||||
}
|
||||
|
||||
|
||||
// Check if all bits corresponding to a mask are set.
|
||||
static inline bool mi_bfield_atomic_is_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
return ((*b & mask) == mask);
|
||||
}
|
||||
|
||||
// Check if all bits corresponding to a mask are clear.
|
||||
static inline bool mi_bfield_atomic_is_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
return ((*b & mask) == 0);
|
||||
}
|
||||
|
||||
|
||||
// Check if all bits corresponding to a mask are set/cleared.
|
||||
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
static inline bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
return ((*b & mask) == mask);
|
||||
return mi_bfield_atomic_is_set_mask(b, mask);
|
||||
}
|
||||
else {
|
||||
return ((*b & mask) == 0);
|
||||
return mi_bfield_atomic_is_clear_mask(b, mask);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check if a bit is set/clear
|
||||
// static inline bool mi_bfield_atomic_is_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
// mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
|
@ -135,22 +218,9 @@ static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b,
|
|||
bitmap chunks
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
|
||||
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = cidx / MI_BFIELD_BITS;
|
||||
const size_t idx = cidx % MI_BFIELD_BITS;
|
||||
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
|
||||
}
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = byte_idx / MI_BFIELD_SIZE;
|
||||
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
|
||||
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
|
||||
// Set/clear a sequence of `n` bits within a chunk.
|
||||
// Returns true if all bits transitioned from 0 to 1 (or 1 to 0).
|
||||
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* pall_already_xset) {
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_transition = true;
|
||||
|
@ -164,17 +234,28 @@ static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t
|
|||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
bool already_xset = false;
|
||||
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
|
||||
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset );
|
||||
all_already_xset = all_already_xset && already_xset;
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
*palready_xset = all_already_xset;
|
||||
if (pall_already_xset!=NULL) { *pall_already_xset = all_already_xset; }
|
||||
return all_transition;
|
||||
}
|
||||
|
||||
|
||||
static inline bool mi_bitmap_chunk_setN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* all_allready_set) {
|
||||
return mi_bitmap_chunk_xsetN(MI_BIT_SET, chunk, cidx, n, all_allready_set);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_clearN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* all_allready_clear) {
|
||||
return mi_bitmap_chunk_xsetN(MI_BIT_CLEAR, chunk, cidx, n, all_allready_clear);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
||||
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS);
|
||||
|
@ -197,6 +278,38 @@ static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, siz
|
|||
return all_xset;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx) {
|
||||
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = cidx / MI_BFIELD_BITS;
|
||||
const size_t idx = cidx % MI_BFIELD_BITS;
|
||||
return mi_bfield_atomic_try_xset(set, &chunk->bfields[i], idx);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_try_set(mi_bitmap_chunk_t* chunk, size_t cidx) {
|
||||
return mi_bitmap_chunk_try_xset(MI_BIT_SET, chunk, cidx);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_try_clear(mi_bitmap_chunk_t* chunk, size_t cidx) {
|
||||
return mi_bitmap_chunk_try_xset(MI_BIT_CLEAR, chunk, cidx);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx) {
|
||||
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = byte_idx / MI_BFIELD_SIZE;
|
||||
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
|
||||
return mi_bfield_atomic_try_xset8(set, &chunk->bfields[i], ibyte_idx);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_try_set8(mi_bitmap_chunk_t* chunk, size_t byte_idx) {
|
||||
return mi_bitmap_chunk_try_xset8(MI_BIT_SET, chunk, byte_idx);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_try_clear8(mi_bitmap_chunk_t* chunk, size_t byte_idx) {
|
||||
return mi_bitmap_chunk_try_xset8(MI_BIT_CLEAR, chunk, byte_idx);
|
||||
}
|
||||
|
||||
// Try to atomically set/clear a sequence of `n` bits within a chunk.
|
||||
// Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving all bit fields as is.
|
||||
|
@ -252,12 +365,19 @@ restore:
|
|||
while( field > start_field) {
|
||||
field--;
|
||||
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
|
||||
bool already_xset;
|
||||
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
|
||||
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, NULL);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_try_setN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
return mi_bitmap_chunk_try_xsetN(MI_BIT_SET, chunk, cidx, n);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_try_clearN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
return mi_bitmap_chunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n);
|
||||
}
|
||||
|
||||
|
||||
// find least 0/1-bit in a chunk and try to set/clear it atomically
|
||||
// set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
|
@ -283,6 +403,46 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu
|
|||
}
|
||||
// try again
|
||||
}
|
||||
#elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512)
|
||||
while (true) {
|
||||
size_t chunk_idx = 0;
|
||||
#if 1
|
||||
__m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
if ((set ? _mm256_test_all_ones(vec) : _mm256_testz_si256(vec,vec))) {
|
||||
chunk_idx += 4;
|
||||
vec = _mm256_load_si256(((const __m256i*)chunk->bfields) + 1);
|
||||
}
|
||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256())); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
||||
// mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared)
|
||||
if (mask==0) return false;
|
||||
mi_assert_internal((_tzcnt_u32(mask)%8) == 0); // tzcnt == 0, 8, 16, or 24
|
||||
chunk_idx += _tzcnt_u32(mask) / 8;
|
||||
#else
|
||||
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
|
||||
const __m256i cmpv = (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256());
|
||||
const __m256i vcmp1 = _mm256_cmpeq_epi64(vec1, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
||||
const __m256i vcmp2 = _mm256_cmpeq_epi64(vec2, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
||||
const uint32_t mask1 = ~_mm256_movemask_epi8(vcmp1); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
||||
const uint32_t mask2 = ~_mm256_movemask_epi8(vcmp1); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
||||
const uint64_t mask = ((uint64_t)mask2 << 32) | mask1;
|
||||
// mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared)
|
||||
if (mask==0) return false;
|
||||
mi_assert_internal((_tzcnt_u64(mask)%8) == 0); // tzcnt == 0, 8, 16, 24 , ..
|
||||
const size_t chunk_idx = _tzcnt_u64(mask) / 8;
|
||||
#endif
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
size_t cidx;
|
||||
if (mi_bfield_find_least_to_xset(set, chunk->bfields[chunk_idx], &cidx)) { // find the bit-idx that is set/clear
|
||||
if mi_likely(mi_bfield_atomic_try_xset(set, &chunk->bfields[chunk_idx], cidx)) { // set/clear it atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
size_t idx;
|
||||
|
@ -302,49 +462,10 @@ static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk,
|
|||
return mi_bitmap_chunk_find_and_try_xset(MI_BIT_CLEAR, chunk, pidx);
|
||||
}
|
||||
|
||||
// static inline bool mi_bitmap_chunk_find_and_try_set(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
// return mi_bitmap_chunk_find_and_try_xset(MI_BIT_SET, chunk, pidx);
|
||||
// }
|
||||
|
||||
/*
|
||||
// find least 1-bit in a chunk and try unset it atomically
|
||||
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
|
||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
|
||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
|
||||
mi_assert_internal(mask != 0);
|
||||
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
size_t cidx;
|
||||
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
|
||||
*pidx = (i*MI_BFIELD_BITS + idx);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
static inline bool mi_bitmap_chunk_find_and_try_set(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
return mi_bitmap_chunk_find_and_try_xset(MI_BIT_SET, chunk, pidx);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
// find least byte in a chunk with all bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
|
@ -392,7 +513,8 @@ static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk,
|
|||
}
|
||||
|
||||
|
||||
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
|
||||
// find a sequence of `n` bits in a chunk with all `n` (`< MI_BFIELD_BITS`!) bits set,
|
||||
// and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
|
||||
// todo: try avx2 and neon version
|
||||
// todo: allow spanning across bfield boundaries?
|
||||
|
@ -410,7 +532,7 @@ static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk,
|
|||
|
||||
if ((b&mask) == mask) { // found a match
|
||||
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
|
||||
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
|
||||
if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i],mask<<bshift)) {
|
||||
*pidx = (i*MI_BFIELD_BITS) + bshift;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
|
||||
|
@ -450,33 +572,90 @@ static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk,
|
|||
// }
|
||||
|
||||
// are all bits in a bitmap chunk clear?
|
||||
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
||||
static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_testz_si256( vec, vec );
|
||||
#elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512)
|
||||
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
if (!_mm256_testz_si256(vec1, vec1)) return false;
|
||||
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
|
||||
return (_mm256_testz_si256(vec2, vec2));
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x | chunk->bfields[i];
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
if (chunk->bfields[i] != 0) return false;
|
||||
}
|
||||
return (x == 0);
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
epochset (for now for 32-bit sets only)
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static void mi_epochset_split(mi_epochset_t es, uint32_t* bset, size_t* epoch) {
|
||||
*bset = (uint32_t)es;
|
||||
*epoch = (size_t)(es >> 32);
|
||||
}
|
||||
|
||||
static mi_epochset_t mi_epochset_join(uint32_t bset, size_t epoch) {
|
||||
return ((uint64_t)epoch << 32) | bset;
|
||||
}
|
||||
|
||||
// setting a bit increases the epoch
|
||||
static void mi_epochset_set(_Atomic(mi_epochset_t)*es, size_t idx) {
|
||||
mi_assert(idx < 32);
|
||||
size_t epoch;
|
||||
uint32_t bset;
|
||||
mi_epochset_t es_new;
|
||||
mi_epochset_t es_old = mi_atomic_load_relaxed(es);
|
||||
do {
|
||||
mi_epochset_split(es_old, &bset, &epoch);
|
||||
es_new = mi_epochset_join(bset | (MI_ZU(1)<<idx), epoch+1);
|
||||
} while (!mi_atomic_cas_weak_acq_rel(es, &es_old, es_new));
|
||||
}
|
||||
|
||||
// clear-ing a bit only works if the epoch didn't change (so we never clear unintended)
|
||||
static bool mi_epochset_try_clear(_Atomic(mi_epochset_t)*es, size_t idx, size_t expected_epoch) {
|
||||
mi_assert(idx < MI_EPOCHSET_BITS);
|
||||
size_t epoch;
|
||||
uint32_t bset;
|
||||
mi_epochset_t es_new;
|
||||
mi_epochset_t es_old = mi_atomic_load_relaxed(es);
|
||||
do {
|
||||
mi_epochset_split(es_old, &bset, &epoch);
|
||||
if (epoch != expected_epoch) return false;
|
||||
es_new = mi_epochset_join(bset & ~(MI_ZU(1)<<idx), epoch); // no need to increase the epoch for clearing
|
||||
} while (!mi_atomic_cas_weak_acq_rel(es, &es_old, es_new));
|
||||
return true;
|
||||
}
|
||||
|
||||
static void mi_bitmap_anyset_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
|
||||
mi_assert(chunk_idx < MI_BITMAP_CHUNK_COUNT);
|
||||
mi_epochset_set(&bitmap->any_set, chunk_idx);
|
||||
}
|
||||
|
||||
static bool mi_bitmap_anyset_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx, size_t epoch) {
|
||||
mi_assert(chunk_idx < MI_BITMAP_CHUNK_COUNT);
|
||||
return mi_epochset_try_clear(&bitmap->any_set, chunk_idx, epoch);
|
||||
}
|
||||
|
||||
static uint32_t mi_bitmap_anyset(mi_bitmap_t* bitmap, size_t* epoch) {
|
||||
uint32_t bset;
|
||||
mi_epochset_split(mi_atomic_load_relaxed(&bitmap->any_set), &bset, epoch);
|
||||
return bset;
|
||||
}
|
||||
|
||||
static size_t mi_bitmap_epoch(mi_bitmap_t* bitmap) {
|
||||
size_t epoch;
|
||||
uint32_t bset;
|
||||
mi_epochset_split(mi_atomic_load_relaxed(&bitmap->any_set), &bset, &epoch);
|
||||
return epoch;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
static void mi_bitmap_update_anyset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
if (set) {
|
||||
mi_bfield_atomic_xset(MI_BIT_SET, &bitmap->any_set, idx);
|
||||
}
|
||||
else { // clear
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR, &bitmap->any_set, idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
|
||||
|
@ -485,8 +664,8 @@ void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
|
|||
}
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
// Set a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
|
||||
|
||||
|
@ -495,19 +674,18 @@ void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_
|
|||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
|
||||
if (m > n) { m = n; }
|
||||
bool already_xset;
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
|
||||
mi_bitmap_update_anyset(set, bitmap, chunk_idx);
|
||||
mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, m, NULL);
|
||||
mi_bitmap_anyset_set(bitmap, chunk_idx);
|
||||
|
||||
// n can be large so use memset for efficiency for all in-between chunks
|
||||
chunk_idx++;
|
||||
n -= m;
|
||||
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
|
||||
if (mid_chunks > 0) {
|
||||
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), mid_chunks * (MI_BITMAP_CHUNK_BITS/8));
|
||||
_mi_memset(&bitmap->chunks[chunk_idx], ~0, mid_chunks * (MI_BITMAP_CHUNK_BITS/8));
|
||||
const size_t end_chunk = chunk_idx + mid_chunks;
|
||||
while (chunk_idx < end_chunk) {
|
||||
mi_bitmap_update_anyset(set, bitmap, chunk_idx);
|
||||
mi_bitmap_anyset_set(bitmap, chunk_idx);
|
||||
chunk_idx++;
|
||||
}
|
||||
n -= (mid_chunks * MI_BITMAP_CHUNK_BITS);
|
||||
|
@ -517,8 +695,8 @@ void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_
|
|||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
|
||||
mi_bitmap_update_anyset(set, bitmap, chunk_idx);
|
||||
mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], 0, n, NULL);
|
||||
mi_bitmap_anyset_set(bitmap, chunk_idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -529,11 +707,25 @@ bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
|||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
bool ok = mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
|
||||
if (ok) { mi_bitmap_update_anyset(set, bitmap, chunk_idx); }
|
||||
return ok;
|
||||
if (set) {
|
||||
// first set the anyset since it is a conservative approximation (increases epoch)
|
||||
mi_bitmap_anyset_set(bitmap, chunk_idx);
|
||||
// then actually try to set it atomically
|
||||
return mi_bitmap_chunk_try_set(&bitmap->chunks[chunk_idx], cidx);
|
||||
}
|
||||
else {
|
||||
const size_t epoch = mi_bitmap_epoch(bitmap);
|
||||
bool cleared = mi_bitmap_chunk_try_clear(&bitmap->chunks[chunk_idx], cidx);
|
||||
if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
return cleared;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
|
@ -541,11 +733,23 @@ bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
|||
mi_assert_internal(idx%8 == 0);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
|
||||
bool ok = mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
|
||||
if (ok) { mi_bitmap_update_anyset(set, bitmap, chunk_idx); }
|
||||
return ok;
|
||||
if (set) {
|
||||
// first set the anyset since it is a conservative approximation (increases epoch)
|
||||
mi_bitmap_anyset_set(bitmap, chunk_idx);
|
||||
// then actually try to set it atomically
|
||||
return mi_bitmap_chunk_try_set8(&bitmap->chunks[chunk_idx], byte_idx);
|
||||
}
|
||||
else {
|
||||
const size_t epoch = mi_bitmap_epoch(bitmap);
|
||||
bool cleared = mi_bitmap_chunk_try_clear8(&bitmap->chunks[chunk_idx], byte_idx);
|
||||
if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
return cleared;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
|
@ -562,21 +766,31 @@ bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
|
|||
mi_assert_internal(chunk_idx < MI_BFIELD_BITS);
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
|
||||
bool ok = mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
if (ok) { mi_bitmap_update_anyset(set, bitmap, chunk_idx); }
|
||||
return ok;
|
||||
if (set) {
|
||||
// first set the anyset since it is a conservative approximation (increases epoch)
|
||||
mi_bitmap_anyset_set(bitmap, chunk_idx);
|
||||
// then actually try to set it atomically
|
||||
return mi_bitmap_chunk_try_setN(&bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
else {
|
||||
const size_t epoch = mi_bitmap_epoch(bitmap);
|
||||
bool cleared = mi_bitmap_chunk_try_clearN(&bitmap->chunks[chunk_idx], cidx, n);
|
||||
if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
return cleared;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* all_already_xset ) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
bool local_already_xset;
|
||||
if (already_xset==NULL) { already_xset = &local_already_xset; }
|
||||
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
|
||||
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
|
||||
//if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
|
||||
//if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
|
||||
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
|
@ -584,11 +798,23 @@ bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bo
|
|||
mi_assert_internal(chunk_idx < MI_BFIELD_BITS);
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
|
||||
const bool allx = mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
|
||||
mi_bitmap_update_anyset(set, bitmap, chunk_idx);
|
||||
return allx;
|
||||
if (set) {
|
||||
// first set the anyset since it is a conservative approximation (increases epoch)
|
||||
mi_bitmap_anyset_set(bitmap, chunk_idx);
|
||||
// then actually try to set it atomically
|
||||
return mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, n, all_already_xset);
|
||||
}
|
||||
else {
|
||||
const size_t epoch = mi_bitmap_epoch(bitmap);
|
||||
bool cleared = mi_bitmap_chunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, all_already_xset);
|
||||
if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
return cleared;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
|
@ -605,16 +831,18 @@ bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n)
|
|||
}
|
||||
|
||||
|
||||
#define mi_bitmap_forall_set_chunks(bitmap,tseq,decl_chunk_idx) \
|
||||
{ size_t _set_idx; \
|
||||
size_t _start = tseq % MI_BFIELD_BITS; \
|
||||
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
|
||||
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
|
||||
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
|
||||
#define mi_bitmap_forall_set_chunks(bitmap,tseq,name_epoch,name_chunk_idx) \
|
||||
{ uint32_t _bit_idx; \
|
||||
uint32_t _start = (uint32_t)(tseq % MI_EPOCHSET_BITS); \
|
||||
size_t name_epoch; \
|
||||
uint32_t _any_set = mi_bitmap_anyset(bitmap,&name_epoch); \
|
||||
_any_set = mi_rotr32(_any_set, _start); \
|
||||
while (mi_bsf32(_any_set,&_bit_idx)) { \
|
||||
size_t name_chunk_idx = (_bit_idx + _start) % MI_BFIELD_BITS;
|
||||
|
||||
#define mi_bitmap_forall_set_chunks_end() \
|
||||
_start += _set_idx+1; /* so chunk_idx stays valid */ \
|
||||
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
|
||||
_start += _bit_idx+1; /* so chunk_idx stays valid */ \
|
||||
_any_set >>= _bit_idx; /* skip scanned bits (and avoid UB with (_bit_idx+1)) */ \
|
||||
_any_set >>= 1; \
|
||||
} \
|
||||
}
|
||||
|
@ -623,8 +851,8 @@ bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n)
|
|||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,tseq,size_t chunk_idx)
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) {
|
||||
mi_bitmap_forall_set_chunks(bitmap, tseq, epoch, chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
|
@ -635,8 +863,8 @@ bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx
|
|||
else {
|
||||
// we may find that all are unset only on a second iteration but that is ok as
|
||||
// _any_set is a conservative approximation.
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
if (epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -647,8 +875,8 @@ bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx
|
|||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx ) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,tseq,size_t chunk_idx)
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx ) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,tseq, epoch, chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
|
@ -658,8 +886,10 @@ bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pid
|
|||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
// we may find that all are unset only on a second iteration but that is ok as
|
||||
// _any_set is a conservative approximation.
|
||||
if (epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -673,10 +903,7 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t
|
|||
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
|
||||
// TODO: allow spanning across chunk boundaries
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false;
|
||||
if (n == 1) return mi_bitmap_try_find_and_clear(bitmap, tseq, pidx);
|
||||
if (n == 8) return mi_bitmap_try_find_and_clear8(bitmap, tseq, pidx);
|
||||
|
||||
mi_bitmap_forall_set_chunks(bitmap,tseq,size_t chunk_idx)
|
||||
mi_bitmap_forall_set_chunks(bitmap,tseq,epoch,chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
|
||||
|
@ -685,8 +912,10 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t
|
|||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
// we may find that all are unset only on a second iteration but that is ok as
|
||||
// _any_set is a conservative approximation.
|
||||
if (epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
65
src/bitmap.h
65
src/bitmap.h
|
@ -25,20 +25,26 @@ typedef size_t mi_bfield_t;
|
|||
#define MI_BFIELD_LO_BIT8 (((~(mi_bfield_t)0))/0xFF) // 0x01010101 ..
|
||||
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
|
||||
|
||||
#define MI_BITMAP_CHUNK_SIZE (MI_BITMAP_CHUNK_BITS / 8)
|
||||
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
|
||||
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
|
||||
|
||||
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
|
||||
// 512 bits on 64_bit
|
||||
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_chunk_s {
|
||||
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
|
||||
} mi_bitmap_chunk_t;
|
||||
|
||||
// for now 32 (note: with ABA instructions we can make this 64)
|
||||
#define MI_EPOCHSET_BITS (32)
|
||||
#define MI_BITMAP_CHUNK_COUNT MI_EPOCHSET_BITS
|
||||
typedef uint64_t mi_epochset_t;
|
||||
|
||||
typedef mi_decl_align(32) struct mi_bitmap_s {
|
||||
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
|
||||
_Atomic(mi_bfield_t)any_set;
|
||||
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s {
|
||||
mi_bitmap_chunk_t chunks[MI_BITMAP_CHUNK_COUNT];
|
||||
_Atomic(mi_epochset_t) any_set;
|
||||
} mi_bitmap_t;
|
||||
|
||||
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
|
||||
#define MI_BITMAP_MAX_BITS (MI_BITMAP_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Bitmap
|
||||
|
@ -52,29 +58,73 @@ typedef bool mi_bit_t;
|
|||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* all_already_xset);
|
||||
|
||||
static inline bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, bool* all_already_set) {
|
||||
return mi_bitmap_xsetN(MI_BIT_SET, bitmap, idx, n, all_already_set);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
return mi_bitmap_xsetN(MI_BIT_CLEAR, bitmap, idx, n, NULL);
|
||||
}
|
||||
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
static inline bool mi_bitmap_is_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
return mi_bitmap_is_xsetN(MI_BIT_SET, bitmap, idx, n);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
|
||||
}
|
||||
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
static inline bool mi_bitmap_try_set(mi_bitmap_t* bitmap, size_t idx) {
|
||||
return mi_bitmap_try_xset(MI_BIT_SET, bitmap, idx);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) {
|
||||
return mi_bitmap_try_xset(MI_BIT_CLEAR, bitmap, idx);
|
||||
}
|
||||
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
static inline bool mi_bitmap_try_set8(mi_bitmap_t* bitmap, size_t idx) {
|
||||
return mi_bitmap_try_xset8(MI_BIT_SET, bitmap, idx);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_try_clear8(mi_bitmap_t* bitmap, size_t idx) {
|
||||
return mi_bitmap_try_xset8(MI_BIT_CLEAR, bitmap, idx);
|
||||
}
|
||||
|
||||
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
static inline bool mi_bitmap_try_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
return mi_bitmap_try_xsetN(MI_BIT_SET, bitmap, idx, n);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
return mi_bitmap_try_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
|
||||
}
|
||||
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
|
@ -89,4 +139,5 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t
|
|||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx );
|
||||
|
||||
|
||||
#endif // MI_XBITMAP_H
|
||||
|
|
|
@ -239,9 +239,9 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block
|
|||
}
|
||||
else {
|
||||
if (mi_page_is_abandoned(page)) {
|
||||
mi_assert(false);
|
||||
// mi_assert(false);
|
||||
}
|
||||
mi_assert_internal(!mi_page_is_singleton(page)); // we should have succeeded on singleton pages
|
||||
// mi_assert_internal(!mi_page_is_singleton(page)); // we should have succeeded on singleton pages
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
10
src/libc.c
10
src/libc.c
|
@ -280,7 +280,7 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
|
|||
// generic trailing and leading zero count
|
||||
// --------------------------------------------------------
|
||||
|
||||
static inline size_t mi_ctz_generic32(uint32_t x) {
|
||||
uint32_t _mi_ctz_generic32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
static const uint8_t debruijn[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
|
@ -290,7 +290,7 @@ static inline size_t mi_ctz_generic32(uint32_t x) {
|
|||
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
|
||||
}
|
||||
|
||||
static inline size_t mi_clz_generic32(uint32_t x) {
|
||||
static size_t mi_clz_generic32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
static const uint8_t debruijn[32] = {
|
||||
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
|
||||
|
@ -319,10 +319,10 @@ size_t _mi_clz_generic(size_t x) {
|
|||
size_t _mi_ctz_generic(size_t x) {
|
||||
if (x==0) return MI_SIZE_BITS;
|
||||
#if (MI_SIZE_BITS <= 32)
|
||||
return mi_ctz_generic32((uint32_t)x);
|
||||
return _mi_ctz_generic32((uint32_t)x);
|
||||
#else
|
||||
const size_t count = mi_ctz_generic32((uint32_t)x);
|
||||
const size_t count = _mi_ctz_generic32((uint32_t)x);
|
||||
if (count < 32) return count;
|
||||
return (32 + mi_ctz_generic32((uint32_t)(x>>32)));
|
||||
return (32 + _mi_ctz_generic32((uint32_t)(x>>32)));
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -412,7 +412,7 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
|
|||
// Define our own limited `fprintf` that avoids memory allocation.
|
||||
// We do this using `_mi_vsnprintf` with a limited buffer.
|
||||
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
|
||||
char buf[512];
|
||||
char buf[768];
|
||||
if (fmt==NULL) return;
|
||||
if (!mi_recurse_enter()) return;
|
||||
_mi_vsnprintf(buf, sizeof(buf)-1, fmt, args);
|
||||
|
|
Loading…
Add table
Reference in a new issue