mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-07-06 19:38:41 +03:00
wip: initial work on mimalloc3 without segments
This commit is contained in:
parent
9b7537755a
commit
71cfa45e76
15 changed files with 3001 additions and 289 deletions
13
src/bitmap.c
13
src/bitmap.c
|
@ -18,6 +18,7 @@ between the fields. (This is used in arena allocation)
|
|||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/bits.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
|
@ -53,7 +54,7 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_
|
|||
const size_t mask = mi_bitmap_mask_(count, 0);
|
||||
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
||||
|
||||
#ifdef MI_HAVE_FAST_BITSCAN
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
|
||||
#else
|
||||
size_t bitidx = 0; // otherwise start at 0
|
||||
|
@ -79,7 +80,7 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_
|
|||
}
|
||||
else {
|
||||
// on to the next bit range
|
||||
#ifdef MI_HAVE_FAST_BITSCAN
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
mi_assert_internal(mapm != 0);
|
||||
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
|
||||
mi_assert_internal(shift > 0 && shift <= count);
|
||||
|
@ -146,7 +147,7 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size
|
|||
return ((field & mask) == mask);
|
||||
}
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
|
@ -154,9 +155,9 @@ bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count
|
|||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
do {
|
||||
do {
|
||||
if ((expected & mask) != 0) return false;
|
||||
}
|
||||
}
|
||||
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
|
||||
mi_assert_internal((expected & mask) == 0);
|
||||
return true;
|
||||
|
@ -194,7 +195,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
|
|||
if (initial == 0) return false;
|
||||
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
|
||||
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
|
||||
|
||||
|
||||
// scan ahead
|
||||
size_t found = initial;
|
||||
size_t mask = 0; // mask bits for the final field
|
||||
|
|
22
src/init.c
22
src/init.c
|
@ -124,6 +124,18 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
|
|||
return _mi_prim_thread_id();
|
||||
}
|
||||
|
||||
// Thread sequence number
|
||||
static _Atomic(size_t) mi_tcount;
|
||||
static mi_decl_thread size_t mi_tseq;
|
||||
|
||||
size_t _mi_thread_seq_id(void) mi_attr_noexcept {
|
||||
size_t tseq = mi_tseq;
|
||||
if (tseq == 0) {
|
||||
mi_tseq = tseq = mi_atomic_add_acq_rel(&mi_tcount,1);
|
||||
}
|
||||
return tseq;
|
||||
}
|
||||
|
||||
// the thread-local default heap for allocation
|
||||
mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
|
||||
|
||||
|
@ -169,8 +181,8 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
|
|||
#if MI_GUARDED
|
||||
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) {
|
||||
heap->guarded_sample_seed = seed;
|
||||
if (heap->guarded_sample_seed == 0) {
|
||||
heap->guarded_sample_seed = _mi_heap_random_next(heap);
|
||||
if (heap->guarded_sample_seed == 0) {
|
||||
heap->guarded_sample_seed = _mi_heap_random_next(heap);
|
||||
}
|
||||
heap->guarded_sample_rate = sample_rate;
|
||||
if (heap->guarded_sample_rate >= 1) {
|
||||
|
@ -188,9 +200,9 @@ void _mi_heap_guarded_init(mi_heap_t* heap) {
|
|||
mi_heap_guarded_set_sample_rate(heap,
|
||||
(size_t)mi_option_get_clamp(mi_option_guarded_sample_rate, 0, LONG_MAX),
|
||||
(size_t)mi_option_get(mi_option_guarded_sample_seed));
|
||||
mi_heap_guarded_set_size_bound(heap,
|
||||
mi_heap_guarded_set_size_bound(heap,
|
||||
(size_t)mi_option_get_clamp(mi_option_guarded_min, 0, LONG_MAX),
|
||||
(size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) );
|
||||
(size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) );
|
||||
}
|
||||
#else
|
||||
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) {
|
||||
|
@ -602,7 +614,7 @@ static void mi_detect_cpu_features(void) {
|
|||
}
|
||||
#else
|
||||
static void mi_detect_cpu_features(void) {
|
||||
// nothing
|
||||
// nothing
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
67
src/libc.c
67
src/libc.c
|
@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
|
||||
// --------------------------------------------------------
|
||||
// This module defines various std libc functions to reduce
|
||||
// the dependency on libc, and also prevent errors caused
|
||||
// the dependency on libc, and also prevent errors caused
|
||||
// by some libc implementations when called before `main`
|
||||
// executes (due to malloc redirection)
|
||||
// --------------------------------------------------------
|
||||
|
@ -83,7 +83,7 @@ bool _mi_getenv(const char* name, char* result, size_t result_size) {
|
|||
// Define our own limited `_mi_vsnprintf` and `_mi_snprintf`
|
||||
// This is mostly to avoid calling these when libc is not yet
|
||||
// initialized (and to reduce dependencies)
|
||||
//
|
||||
//
|
||||
// format: d i, p x u, s
|
||||
// prec: z l ll L
|
||||
// width: 10
|
||||
|
@ -130,7 +130,7 @@ static void mi_out_alignright(char fill, char* start, size_t len, size_t extra,
|
|||
}
|
||||
|
||||
|
||||
static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char* end)
|
||||
static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char* end)
|
||||
{
|
||||
if (x == 0 || base == 0 || base > 16) {
|
||||
if (prefix != 0) { mi_outc(prefix, out, end); }
|
||||
|
@ -144,8 +144,8 @@ static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char*
|
|||
mi_outc((digit <= 9 ? '0' + digit : 'A' + digit - 10),out,end);
|
||||
x = x / base;
|
||||
}
|
||||
if (prefix != 0) {
|
||||
mi_outc(prefix, out, end);
|
||||
if (prefix != 0) {
|
||||
mi_outc(prefix, out, end);
|
||||
}
|
||||
size_t len = *out - start;
|
||||
// and reverse in-place
|
||||
|
@ -181,7 +181,7 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
|
|||
size_t width = 0;
|
||||
char numtype = 'd';
|
||||
char numplus = 0;
|
||||
bool alignright = true;
|
||||
bool alignright = true;
|
||||
if (c == '+' || c == ' ') { numplus = c; MI_NEXTC(); }
|
||||
if (c == '-') { alignright = false; MI_NEXTC(); }
|
||||
if (c == '0') { fill = '0'; MI_NEXTC(); }
|
||||
|
@ -191,7 +191,7 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
|
|||
width = (10 * width) + (c - '0'); MI_NEXTC();
|
||||
}
|
||||
if (c == 0) break; // extra check due to while
|
||||
}
|
||||
}
|
||||
if (c == 'z' || c == 't' || c == 'L') { numtype = c; MI_NEXTC(); }
|
||||
else if (c == 'l') {
|
||||
numtype = c; MI_NEXTC();
|
||||
|
@ -273,3 +273,56 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
|
|||
_mi_vsnprintf(buf, buflen, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
// generic trailing and leading zero count
|
||||
// --------------------------------------------------------
|
||||
|
||||
static inline size_t mi_ctz_generic32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
static const uint8_t debruijn[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
||||
};
|
||||
if (x==0) return 32;
|
||||
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
|
||||
}
|
||||
|
||||
static inline size_t mi_clz_generic32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
static const uint8_t debruijn[32] = {
|
||||
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
|
||||
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
|
||||
};
|
||||
if (x==0) return 32;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
|
||||
}
|
||||
|
||||
size_t _mi_clz_generic(size_t x) {
|
||||
if (x==0) return MI_SIZE_BITS;
|
||||
#if (MI_SIZE_BITS <= 32)
|
||||
return mi_clz_generic32((uint32_t)x);
|
||||
#else
|
||||
const size_t count = mi_clz_generic32((uint32_t)(x >> 32));
|
||||
if (count < 32) return count;
|
||||
return (32 + mi_clz_generic32((uint32_t)x));
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t _mi_ctz_generic(size_t x) {
|
||||
if (x==0) return MI_SIZE_BITS;
|
||||
#if (MI_SIZE_BITS <= 32)
|
||||
return mi_ctz_generic32((uint32_t)x);
|
||||
#else
|
||||
const size_t count = mi_ctz_generic32((uint32_t)x);
|
||||
if (count < 32) return count;
|
||||
return (32 + mi_ctz_generic32((uint32_t)(x>>32)));
|
||||
#endif
|
||||
}
|
||||
|
|
12
src/os.c
12
src/os.c
|
@ -359,6 +359,18 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
|
|||
return p;
|
||||
}
|
||||
|
||||
void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
|
||||
void* p = _mi_os_alloc(size, memid, &_mi_stats_main);
|
||||
if (p == NULL) return NULL;
|
||||
|
||||
// zero the OS memory if needed
|
||||
if (!memid->initially_zero) {
|
||||
_mi_memzero_aligned(p, size);
|
||||
memid->initially_zero = true;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
OS aligned allocation with an offset. This is used
|
||||
for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
|
||||
|
|
|
@ -83,9 +83,10 @@ static inline uint8_t mi_bin(size_t size) {
|
|||
#if defined(MI_ALIGN4W)
|
||||
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
|
||||
#endif
|
||||
wsize--;
|
||||
// find the highest bit
|
||||
uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0
|
||||
wsize--;
|
||||
mi_assert_internal(wsize!=0);
|
||||
// find the highest bit position
|
||||
uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
|
||||
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
|
||||
// - adjust with 3 because we use do not round the first 8 sizes
|
||||
// which each get an exact bin
|
||||
|
|
1777
src/xarena.c
Normal file
1777
src/xarena.c
Normal file
File diff suppressed because it is too large
Load diff
599
src/xbitmap.c
Normal file
599
src/xbitmap.c
Normal file
|
@ -0,0 +1,599 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/bits.h"
|
||||
#include "xbitmap.h"
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bfields
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static inline size_t mi_bfield_ctz(mi_bfield_t x) {
|
||||
return mi_ctz(x);
|
||||
}
|
||||
|
||||
static inline size_t mi_bfield_clz(mi_bfield_t x) {
|
||||
return mi_clz(x);
|
||||
}
|
||||
|
||||
// find the least significant bit that is set (i.e. count trailing zero's)
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
|
||||
return mi_bsf(x,idx);
|
||||
}
|
||||
|
||||
static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
|
||||
return mi_rotr(x,r);
|
||||
}
|
||||
|
||||
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
|
||||
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
|
||||
if (set) {
|
||||
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else {
|
||||
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
|
||||
// `already_xset` is true if all bits for the mask were already set/cleared.
|
||||
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
|
||||
*already_xset = ((old&mask) == mask);
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
|
||||
*already_xset = ((old&mask) == 0);
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
|
||||
return mi_bfield_atomic_xset(set, b, idx);
|
||||
}
|
||||
|
||||
|
||||
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != 0) return false; // the mask bits are no longer 0
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
|
||||
return true;
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != mask) return false; // the mask bits are no longer set
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if all bits corresponding to a mask are set/cleared.
|
||||
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
return ((*b & mask) == mask);
|
||||
}
|
||||
else {
|
||||
return ((*b & mask) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)* b, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
|
||||
return mi_bfield_atomic_try_xset_mask(set,b,mask);
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap chunks
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
|
||||
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = cidx / MI_BFIELD_BITS;
|
||||
const size_t idx = cidx % MI_BFIELD_BITS;
|
||||
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
|
||||
}
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = byte_idx / MI_BFIELD_SIZE;
|
||||
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
|
||||
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_transition = true;
|
||||
bool all_already_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
bool already_xset;
|
||||
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
|
||||
all_already_xset = all_already_xset && already_xset;
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
*palready_xset = all_already_xset;
|
||||
return all_transition;
|
||||
}
|
||||
|
||||
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
||||
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
all_xset = all_xset && mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask);
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
return all_xset;
|
||||
}
|
||||
|
||||
// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving all bit fields as is.
|
||||
static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
if (n==0) return true;
|
||||
size_t start_idx = cidx % MI_BFIELD_BITS;
|
||||
size_t start_field = cidx / MI_BFIELD_BITS;
|
||||
size_t end_field = MI_BITMAP_CHUNK_FIELDS;
|
||||
size_t mask_mid = 0;
|
||||
size_t mask_end = 0;
|
||||
|
||||
// first field
|
||||
size_t field = start_field;
|
||||
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << start_idx);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false;
|
||||
|
||||
// done?
|
||||
n -= m;
|
||||
if (n==0) return true;
|
||||
|
||||
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
|
||||
|
||||
// mid fields
|
||||
while (n >= MI_BFIELD_BITS) {
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
mask_mid = ~MI_ZU(0);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
|
||||
n -= MI_BFIELD_BITS;
|
||||
}
|
||||
|
||||
// last field
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BFIELD_BITS);
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
end_field = field;
|
||||
mask_end = (MI_ZU(1)<<n)-1;
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
restore:
|
||||
// field is on the field that failed to set atomically; we need to restore all previous fields
|
||||
mi_assert_internal(field > start_field);
|
||||
while( field > start_field) {
|
||||
field--;
|
||||
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
|
||||
bool already_xset;
|
||||
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// find least 1-bit in a chunk and try unset it atomically
|
||||
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
|
||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
|
||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
|
||||
mi_assert_internal(mask != 0);
|
||||
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
size_t cidx;
|
||||
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
|
||||
*pidx = (i*MI_BFIELD_BITS + idx);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find least byte in a chunk with all bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
|
||||
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
|
||||
if (mask == 0) return false;
|
||||
const size_t i = _tzcnt_u32(mask);
|
||||
mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t chunk_idx = i / MI_BFIELD_SIZE;
|
||||
const size_t byte_idx = i % MI_BFIELD_SIZE;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
const mi_bfield_t x = chunk->bfields[i];
|
||||
// has_set8 has low bit in each byte set if the byte in x == 0xFF
|
||||
const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
|
||||
(x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
|
||||
>> 7; // shift high bit to low bit
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
|
||||
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
|
||||
mi_assert_internal((idx%8)==0);
|
||||
const size_t byte_idx = idx/8;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
|
||||
*pidx = (i*MI_BFIELD_BITS) + idx;
|
||||
mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// else continue
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
|
||||
// todo: try avx2 and neon version
|
||||
// todo: allow spanning across bfield boundaries?
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
|
||||
const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
mi_bfield_t b = chunk->bfields[i];
|
||||
size_t bshift = 0;
|
||||
size_t idx;
|
||||
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
||||
b >>= idx;
|
||||
bshift += idx;
|
||||
if (bshift + n >= MI_BFIELD_BITS) break;
|
||||
|
||||
if ((b&mask) == mask) { // found a match
|
||||
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
|
||||
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
|
||||
*pidx = (i*MI_BFIELD_BITS) + bshift;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// if failed to atomically commit, try again from this position
|
||||
b = (chunk->bfields[i] >> bshift);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// advance
|
||||
const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
|
||||
mi_assert_internal(ones>0);
|
||||
bshift += ones;
|
||||
b >>= ones;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// are all bits in a bitmap chunk set?
|
||||
static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_test_all_ones(vec);
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x & chunk->bfields[i];
|
||||
}
|
||||
return (~x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
// are all bits in a bitmap chunk clear?
|
||||
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_testz_si256( vec, vec );
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x | chunk->bfields[i];
|
||||
}
|
||||
return (x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
|
||||
if (!already_zero) {
|
||||
_mi_memzero_aligned(bitmap, sizeof(*bitmap));
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
|
||||
|
||||
// first chunk
|
||||
size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
|
||||
if (m > n) { m = n; }
|
||||
bool already_xset;
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
|
||||
|
||||
// n can be large so use memset for efficiency for all in-between chunks
|
||||
chunk_idx++;
|
||||
n -= m;
|
||||
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
|
||||
if (mid_chunks > 0) {
|
||||
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
|
||||
chunk_idx += mid_chunks;
|
||||
n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
|
||||
}
|
||||
|
||||
// last chunk
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
|
||||
}
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
mi_assert_internal(idx%8 == 0);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
|
||||
return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
|
||||
if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
bool local_already_xset;
|
||||
if (already_xset==NULL) { already_xset = &local_already_xset; }
|
||||
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
|
||||
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
|
||||
}
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
|
||||
#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
|
||||
{ size_t _set_idx; \
|
||||
size_t _start = start % MI_BFIELD_BITS; \
|
||||
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
|
||||
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
|
||||
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
|
||||
|
||||
#define mi_bitmap_forall_set_chunks_end() \
|
||||
_start += _set_idx+1; /* so chunk_idx stays valid */ \
|
||||
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
|
||||
_any_set >>= 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// we may find that all are unset only on a second iteration but that is ok as
|
||||
// _any_set is a conservative approximation.
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
|
||||
mi_assert_internal((*pidx % 8) == 0);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
|
||||
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
|
||||
// TODO: allow spanning across chunk boundaries
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false;
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
94
src/xbitmap.h
Normal file
94
src/xbitmap.h
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_XBITMAP_H
|
||||
#define MI_XBITMAP_H
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Definitions
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
typedef size_t mi_bfield_t;
|
||||
|
||||
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
|
||||
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
|
||||
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
|
||||
#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
|
||||
#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
|
||||
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
|
||||
|
||||
#define MI_BITMAP_CHUNK_BITS_SHIFT (8) // 2^8 = 256 bits per chunk
|
||||
#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
|
||||
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
|
||||
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
|
||||
|
||||
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
|
||||
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
|
||||
} mi_bitmap_chunk_t;
|
||||
|
||||
|
||||
typedef mi_decl_align(32) struct mi_bitmap_s {
|
||||
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
|
||||
_Atomic(mi_bfield_t)any_set;
|
||||
} mi_bitmap_t;
|
||||
|
||||
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
typedef bool mi_bit_t;
|
||||
#define MI_BIT_SET (true)
|
||||
#define MI_BIT_CLEAR (false)
|
||||
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
|
||||
|
||||
#endif // MI_XBITMAP_H
|
Loading…
Add table
Add a link
Reference in a new issue