working simplified version without pairmaps and bitmap epoch

This commit is contained in:
daanx 2024-12-06 15:26:01 -08:00
parent ec9c61c066
commit 61436a92b9
5 changed files with 44 additions and 38 deletions

View file

@ -42,9 +42,9 @@ static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
return mi_rotr(x,r);
}
static inline mi_bfield_t mi_bfield_zero(void) {
return 0;
}
//static inline mi_bfield_t mi_bfield_zero(void) {
// return 0;
//}
static inline mi_bfield_t mi_bfield_one(void) {
return 1;
@ -147,10 +147,10 @@ static inline bool mi_bfield_atomic_xset_mask(mi_xset_t set, _Atomic(mi_bfield_t
// Tries to set a bit atomically. Returns `true` if the bit transitioned from 0 to 1
// and otherwise false (leaving the bit unchanged)
static inline bool mi_bfield_atomic_try_set(_Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS);
return mi_bfield_atomic_set(b, idx); // for a single bit there is no difference
}
//static inline bool mi_bfield_atomic_try_set(_Atomic(mi_bfield_t)*b, size_t idx) {
// mi_assert_internal(idx < MI_BFIELD_BITS);
// return mi_bfield_atomic_set(b, idx); // for a single bit there is no difference
//}
// Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0.
// `all_clear` is set to true if the new bfield is zero (and false otherwise)
@ -237,17 +237,17 @@ static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t by
// Try to set a full field of bits atomically, and return true all bits transitioned from all 0's to 1's.
// and false otherwise leaving the bit field as-is.
static inline bool mi_bfield_atomic_try_setX(_Atomic(mi_bfield_t)*b) {
mi_bfield_t old = 0;
return mi_atomic_cas_weak_acq_rel(b, &old, mi_bfield_all_set());
}
//static inline bool mi_bfield_atomic_try_setX(_Atomic(mi_bfield_t)*b) {
// mi_bfield_t old = 0;
// return mi_atomic_cas_weak_acq_rel(b, &old, mi_bfield_all_set());
//}
// Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's.
// and false otherwise leaving the bit field as-is.
static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b) {
mi_bfield_t old = mi_bfield_all_set();
return mi_atomic_cas_weak_acq_rel(b, &old, mi_bfield_zero());
}
//static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b) {
// mi_bfield_t old = mi_bfield_all_set();
// return mi_atomic_cas_weak_acq_rel(b, &old, mi_bfield_zero());
//}
// Check if all bits corresponding to a mask are set.
@ -328,7 +328,7 @@ static bool mi_bchunk_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size
const mi_bfield_t mask = mi_bfield_mask(m, idx);
size_t already_xset = 0;
const bool transition = mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
mi_assert_internal((transition && already_xset == m) || (!transition && already_xset > 0));
mi_assert_internal((transition && already_xset == 0) || (!transition && already_xset > 0));
all_transition = all_transition && transition;
total_already_xset += already_xset;
// next field
@ -605,9 +605,9 @@ static inline bool mi_bchunk_find_and_try_clear(mi_bchunk_t* chunk, size_t* pidx
return mi_bchunk_find_and_try_xset(MI_BIT_CLEAR, chunk, pidx);
}
static inline bool mi_bchunk_find_and_try_set(mi_bchunk_t* chunk, size_t* pidx) {
return mi_bchunk_find_and_try_xset(MI_BIT_SET, chunk, pidx);
}
//static inline bool mi_bchunk_find_and_try_set(mi_bchunk_t* chunk, size_t* pidx) {
// return mi_bchunk_find_and_try_xset(MI_BIT_SET, chunk, pidx);
//}
// find least byte in a chunk with all bits set, and try unset it atomically
@ -763,7 +763,7 @@ static inline bool mi_bchunk_all_are_clear_relaxed(mi_bchunk_t* chunk) {
// a 64b cache-line contains the entire chunk anyway so load both at once
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
return (mi_mm256_is_zero(_mm256_or_epi64(vec1,vec2)));
return (mi_mm256_is_zero(_mm256_or_si256(vec1,vec2)));
#else
return mi_bchunk_all_are_clear(chunk);
#endif
@ -810,7 +810,7 @@ size_t mi_bitmap_size(size_t bit_count, size_t* pchunk_count) {
mi_assert_internal(bit_count > 0);
const size_t chunk_count = bit_count / MI_BCHUNK_BITS;
mi_assert_internal(chunk_count >= 1);
const size_t size = sizeof(mi_bitmap_t) + ((chunk_count - 1) * MI_BCHUNK_SIZE);
const size_t size = offsetof(mi_bitmap_t,chunks) + (chunk_count * MI_BCHUNK_SIZE);
mi_assert_internal( (size%MI_BCHUNK_SIZE) == 0 );
if (pchunk_count != NULL) { *pchunk_count = chunk_count; }
return size;
@ -1044,10 +1044,10 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
const size_t chunk_idx0 = i*MI_BFIELD_BITS; \
mi_bfield_t cmap = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]); \
size_t cmap_idx_shift = 0; /* shift through the cmap */ \
if (_i == 0) { cmap = mi_rotr(cmap, chunkmap_start_idx); cmap_idx_shift = chunkmap_start_idx; } /* rotate right for the start position (on the first iteration) */ \
if (_i == 0) { cmap = mi_bfield_rotate_right(cmap, chunkmap_start_idx); cmap_idx_shift = chunkmap_start_idx; } /* rotate right for the start position (on the first iteration) */ \
\
size_t cmap_idx; \
while (mi_bsf(cmap, &cmap_idx)) { /* find least bit that is set */ \
while (mi_bfield_find_least_bit(cmap, &cmap_idx)) { \
/* set the chunk idx */ \
size_t name_chunk_idx = chunk_idx0 + ((cmap_idx + cmap_idx_shift) % MI_BFIELD_BITS); \
mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); \
@ -1130,4 +1130,4 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx) {
const size_t cidx = idx % MI_BCHUNK_BITS;
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
mi_bchunk_clear_once_set(&bitmap->chunks[chunk_idx], cidx);
}
}

View file

@ -75,12 +75,18 @@ typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bchunk_s {
// The chunkmap is itself a chunk.
typedef mi_bchunk_t mi_bchunkmap_t;
#define MI_BCHUNKMAP_BITS MI_BCHUNK_BITS
#define MI_BCHUNKMAP_BITS MI_BCHUNK_BITS
#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BCHUNKMAP_BITS)
#define MI_BITMAP_MIN_CHUNK_COUNT (1)
#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BCHUNK_BITS) // 16 GiB arena
#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BCHUNK_BITS) // 32 MiB arena
#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BCHUNKMAP_BITS)
#define MI_BITMAP_MIN_CHUNK_COUNT (1)
#if MI_SIZE_BITS > 32
#define MI_BITMAP_DEFAULT_CHUNK_COUNT (64) // 2 GiB on 64-bit -- this is for the page map
#else
#define MI_BITMAP_DEFAULT_CHUNK_COUNT (1)
#endif
#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BCHUNK_BITS) // 16 GiB arena
#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BCHUNK_BITS) // 32 MiB arena
#define MI_BITMAP_DEFAULT_BIT_COUNT (MI_BITMAP_DEFAULT_CHUNK_COUNT * MI_BCHUNK_BITS) // 2 GiB arena
// An atomic bitmap
@ -88,7 +94,7 @@ typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc
mi_bchunkmap_t chunkmap;
mi_bchunk_t chunks[1]; // or more, up to MI_BITMAP_MAX_CHUNK_COUNT
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bitmap_t;

View file

@ -400,7 +400,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
tld->heap_backing = bheap;
tld->heaps = NULL;
tld->subproc = &mi_subproc_default;
tld->tseq = 0; // mi_atomic_add_acq_rel(&mi_tcount, 1);
tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1);
tld->os.stats = &tld->stats;
}

View file

@ -13,7 +13,7 @@ mi_decl_cache_align uint8_t* _mi_page_map = NULL;
static bool mi_page_map_all_committed = false;
static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE;
static mi_memid_t mi_page_map_memid;
static mi_bitmap_t mi_page_map_commit = { 1, MI_BITMAP_MIN_CHUNK_COUNT };
static mi_bitmap_t mi_page_map_commit = { MI_BITMAP_DEFAULT_CHUNK_COUNT, { 0 }, { 0 }, { { 0 } } };
bool _mi_page_map_init(void) {
size_t vbits = _mi_os_virtual_address_bits();
@ -22,10 +22,10 @@ bool _mi_page_map_init(void) {
// 64 KiB for 4 GiB address space (on 32-bit)
const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT));
mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size, MI_BITMAP_MIN_BIT_COUNT);
mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size, MI_BITMAP_DEFAULT_BIT_COUNT);
// mi_bitmap_init(&mi_page_map_commit, MI_BITMAP_MIN_BIT_COUNT, true);
mi_page_map_all_committed = false; // _mi_os_has_overcommit(); // commit on-access on Linux systems?
mi_page_map_all_committed = (page_map_size <= 1*MI_MiB); // _mi_os_has_overcommit(); // commit on-access on Linux systems?
_mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
if (_mi_page_map==NULL) {
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);

View file

@ -41,11 +41,11 @@ static int THREADS = 8;
static int SCALE = 10;
static int ITER = 10;
#elif 0
static int THREADS = 1;
static int THREADS = 4;
static int SCALE = 100;
static int ITER = 10;
#define ALLOW_LARGE false
#elif 1
#elif 0
static int THREADS = 32;
static int SCALE = 50;
static int ITER = 50;
@ -343,9 +343,9 @@ int main(int argc, char** argv) {
#ifndef USE_STD_MALLOC
#ifndef NDEBUG
mi_debug_show_arenas(true, true, false);
//mi_debug_show_arenas(true, true, false);
mi_collect(true);
// mi_debug_show_arenas(true,true,false);
mi_debug_show_arenas(true,true,false);
#endif
// mi_stats_print(NULL);
#else